Skip to content

Commit

Permalink
add twitch llm max tokens vs. story tokens
Browse files Browse the repository at this point in the history
allow setting twitch llm usage different than main stories.

reduce story length max tokens for llm story messages.
  • Loading branch information
Chris Kennedy committed Mar 30, 2024
1 parent b20c18d commit 4d137e0
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 21 deletions.
10 changes: 6 additions & 4 deletions scripts/twitch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ MODEL_ID=7b-it
# Generic settings
USE_API=1
CHAT_FORMAT=chatml
MAX_TOKENS=2000
MAX_TOKENS=800
TEMPERATURE=0.8
CONTEXT_SIZE=8000
QUANTIZED=0
Expand All @@ -30,15 +30,16 @@ SD_MAX_LENGTH=50
## Pipeline Settings
DAEMON=1
CONTINUOUS=1
POLL_INTERVAL=1000
POLL_INTERVAL=60000
PIPELINE_CONCURRENCY=6
ASYNC_CONCURRENCY=0
NDI_TIMEOUT=600
## Twitch Chat Settings
TWITCH_MODEL=mistral
TWITCH_LLM_CONCURRENCY=1
TWITCH_CHAT_HISTORY=16
TWITCH_MAX_TOKENS=120
TWITCH_MAX_TOKENS_CHAT=120
TWITCH_MAX_TOKENS_LLM=500
## Stable Diffusion Settings
SD_TEXT_MIN=70
SD_WIDTH=512
Expand Down Expand Up @@ -106,7 +107,8 @@ DYLD_LIBRARY_PATH=`pwd`:/usr/local/lib:$DYLD_LIBRARY_PATH \
--twitch-chat-history $TWITCH_CHAT_HISTORY \
--twitch-llm-concurrency $TWITCH_LLM_CONCURRENCY \
--twitch-model $TWITCH_MODEL \
--twitch-max-tokens $TWITCH_MAX_TOKENS \
--twitch-max-tokens-chat $TWITCH_MAX_TOKENS_CHAT \
--twitch-max-tokens-llm $TWITCH_MAX_TOKENS_LLM \
--twitch-prompt "$TWITCH_PROMPT" \
--mimic3-tts \
$SD_API_CMD \
Expand Down
13 changes: 11 additions & 2 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -756,11 +756,20 @@ pub struct Args {
/// Twitch Max Tokens - max tokens for LLM
#[clap(
long,
env = "TWITCH_MAX_TOKENS",
env = "TWITCH_MAX_TOKENS_CHAT",
default_value_t = 150,
help = "Twitch Max Tokens."
)]
pub twitch_max_tokens: usize,
pub twitch_max_tokens_chat: usize,

//// Twitch Max Tokens LLM - max tokens for LLM
#[clap(
long,
env = "TWITCH_MAX_TOKENS_LLM",
default_value_t = 150,
help = "Twitch Max Tokens LLM."
)]
pub twitch_max_tokens_llm: usize,

/// single concurrency - bool single concurrency for all models, wait between each request
#[clap(
Expand Down
37 changes: 24 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -782,15 +782,6 @@ async fn main() {
}
}

// Did not get a message from twitch, so don't process the query
if !twitch_query && args.twitch_client {
if !args.continuous {
// sleep for a while to avoid busy loop
tokio::time::sleep(Duration::from_millis(100)).await;
continue;
}
}

// break the loop if we are not running as a daemon or hit max iterations
let rctrlc_clone = running_ctrlc.clone();
if (!rctrlc_clone.load(Ordering::SeqCst)
Expand Down Expand Up @@ -880,8 +871,28 @@ async fn main() {
// Calculate elapsed time since last start
let elapsed = poll_start_time.elapsed();

let mut max_tokens = args.max_tokens as usize;

// Did not get a message from twitch, so don't process the query
if !twitch_query && args.twitch_client {
if args.continuous {
// only play a story after poll_interval_duration has passed, else continue
if elapsed < poll_interval_duration {
tokio::time::sleep(Duration::from_millis(100)).await;
continue;
}
} else {
// sleep for a while to avoid busy loop
tokio::time::sleep(Duration::from_millis(100)).await;
continue;
}
} else if args.twitch_client && twitch_query {
// reset the max tokens
max_tokens = args.twitch_max_tokens_llm;
}

// Sleep only if the elapsed time is less than the poll interval
if !twitch_query
if !args.twitch_client
&& iterations > 0
&& !args.interactive
&& (args.daemon || args.max_iterations > 1)
Expand Down Expand Up @@ -1148,7 +1159,7 @@ async fn main() {
tokio::spawn(async move {
let open_ai_request = OpenAIRequest {
model: &model_clone,
max_tokens: &args.max_tokens,
max_tokens: &max_tokens,
messages: messages_clone,
temperature: &args.temperature,
top_p: &args.top_p,
Expand All @@ -1173,7 +1184,7 @@ async fn main() {
let mistral_clone = mistral.clone();
if let Err(e) = mistral_clone(
prompt_clone,
args.max_tokens as usize,
max_tokens as usize,
args.temperature as f64,
args.quantized,
Some(model_id),
Expand All @@ -1187,7 +1198,7 @@ async fn main() {
let gemma_clone = gemma.clone();
if let Err(e) = gemma_clone(
prompt_clone,
args.max_tokens as usize,
max_tokens as usize,
args.temperature as f64,
args.quantized,
Some(model_id),
Expand Down
2 changes: 1 addition & 1 deletion src/openai_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub struct Message {
pub struct OpenAIRequest<'a> {
pub model: &'a str,
pub messages: Vec<Message>,
pub max_tokens: &'a i32, // add this field to the request struct
pub max_tokens: &'a usize, // add this field to the request struct
pub temperature: &'a f32, // add this field to the request struct
pub top_p: &'a f32, // add this field to the request struct
pub presence_penalty: &'a f32, // add this field to the request struct
Expand Down
2 changes: 1 addition & 1 deletion src/twitch_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ async fn on_msg(
if !msg.text().starts_with("!help") && !msg.text().starts_with("!message") {
// LLM Thread
let (external_sender, mut external_receiver) = tokio::sync::mpsc::channel::<String>(100);
let max_tokens = args.twitch_max_tokens;
let max_tokens = args.twitch_max_tokens_chat;
let temperature = 0.8;
let quantized = false;
let max_messages = args.twitch_chat_history;
Expand Down

0 comments on commit 4d137e0

Please sign in to comment.