Skip to content

Commit

Permalink
feat: server mode
Browse files Browse the repository at this point in the history
  • Loading branch information
pond918 committed Jul 11, 2023
1 parent 64a720a commit 5be901a
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 26 deletions.
1 change: 1 addition & 0 deletions .eslintrc.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ module.exports = {
'@typescript-eslint/no-dupe-class-members': ['error'],
'@typescript-eslint/no-useless-constructor': ['error'],
'@typescript-eslint/no-inferrable-types': ['off'],
'@typescript-eslint/no-explicit-any': 'off',
},
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"types": "dist/index.d.ts",
"main": "dist/index.js",
"description": "extendable llm bots sdk to integrate with any system, with tree structured history and conversations support. you can easliy add new own bots under the hood.",
"version": "0.0.1006",
"version": "0.0.2",
"keywords": [
"llm",
"chatbot",
Expand Down
47 changes: 42 additions & 5 deletions src/bots/LangChainBot.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import { ChatOpenAI } from 'langchain/chat_models/openai'
import { AIChatMessage, HumanChatMessage, SystemChatMessage } from 'langchain/schema'
import { AIChatMessage, HumanChatMessage, LLMResult, SystemChatMessage } from 'langchain/schema'
import { LLMBot, LLMServerType } from './base-bot'
import { ChatDto } from './chat.dto'

export default abstract class LangChainBot extends LLMBot {
protected _chatModel!: ChatOpenAI
/** if stream = true, token usage not available for azure-gpt */
protected _usage = true

constructor(protected readonly _brandId = 'langChainBot', outputFormat = 'markdown') {
super(_brandId, outputFormat)
}

async _sendPrompt(msg: ChatDto, streamCallback?: (msg: ChatDto) => void): Promise<ChatDto> {
async _sendPrompt(msg: ChatDto): Promise<ChatDto> {
// Convert the messages to the correct format
const messages =
msg.options.__history?.map(m => {
Expand All @@ -25,12 +27,47 @@ export default abstract class LangChainBot extends LLMBot {
{
handleLLMNewToken(token: string) {
if (token) res += token
streamCallback && streamCallback(new ChatDto(res, token ? -1 : 0))
msg.options?.stream && msg.options.stream(new ChatDto(res, -1))
},
handleLLMEnd: (val: LLMResult) => {
res || (res = val.generations[0][0].text) // TODO
const tokens = val.llmOutput?.tokenUsage?.totalTokens
if (this._usage) {
if (!tokens) {
msg.statusCode = 500
throw new Error((msg.message = 'LLM token usage should not be empty'))
}
msg.options.tokens = tokens
msg.options.quotaTokens = (msg.options.quotaTokens || 0) - tokens
}
msg.options?.stream && msg.options.stream(new ChatDto(res, 0))
},
},
]
await this._chatModel.call(messages, undefined, callbacks)
return new ChatDto(res)

const { streaming, temperature, n, topP, maxTokens } = this._chatModel

try {
this._chatModel.streaming = !this._usage && !!msg.options.stream
msg.options?.n && (this._chatModel.n = msg.options.n)
msg.options?.topP && (this._chatModel.topP = msg.options.topP)
msg.options?.modelName && (this._chatModel.modelName = msg.options.modelName)
msg.options?.temperature && (this._chatModel.temperature = msg.options.temperature)
msg.options?.maxTokens &&
(this._chatModel.maxTokens = msg.options.quotaTokens
? Math.min(msg.options.quotaTokens, msg.options.maxTokens)
: msg.options.maxTokens)

await this._chatModel.call(messages, undefined, callbacks)
} finally {
for (const [p, v] of Object.entries({ streaming, temperature, n, topP, maxTokens }))
(this._chatModel as any)[p] = v
}
const resp = new ChatDto(res, msg.statusCode)
msg.options.tokens &&
([resp.options.tokens, resp.options.quotaTokens] = [msg.options.tokens, msg.options.quotaTokens])
msg.message && (resp.message = msg.message)
return resp
}

_getServerType(): LLMServerType {
Expand Down
8 changes: 4 additions & 4 deletions src/bots/base-bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ export abstract class LLMBot {
await this._userStorage.set('_isAvailable', v)
}

async sendPrompt(msg: ChatDto, streamCallback?: (msg: ChatDto) => void): Promise<ChatDto> {
async sendPrompt(msg: ChatDto): Promise<ChatDto> {
if (!msg.text) return new ChatDto('')

if (!(await this.isAvailable())) {
const msg = new ChatDto('bot.notAvailable', 404)
streamCallback && streamCallback(msg)
msg.options?.stream && msg.options.stream(msg)
return msg
}

Expand All @@ -109,7 +109,7 @@ export abstract class LLMBot {
await this._setConversation(msg.options._conversationKey)
}

return this._sendPrompt(msg, streamCallback).then(async resp => {
return this._sendPrompt(msg).then(async resp => {
// store response msg into history
resp.options.lastMsgId = msg.id
resp.options.type = 'ai'
Expand All @@ -122,7 +122,7 @@ export abstract class LLMBot {
* @param msg prompt msg, or whole chat thread.
* @param streamCallback
*/
abstract _sendPrompt(msg: ChatDto, streamCallback?: (msg: ChatDto) => void): Promise<ChatDto>
abstract _sendPrompt(msg: ChatDto): Promise<ChatDto>

/**
* @returns the LLM server type:
Expand Down
18 changes: 16 additions & 2 deletions src/bots/chat.dto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,26 @@ export class ChatDto {
lastMsgId?: string
/** conversation key from llm server */
_conversationKey?: string
/** TODO approximately max words of new response */
maxResponse?: number
/** if true, this msg & it's response will not be stored into history */
stateless?: boolean
/** history of this msg */
__history?: ChatDto[]

//// llm props
/** stream callback */
stream?: (msg: ChatDto) => void
modelName?: string
temperature?: number
/** Number of completions to generate for each prompt */
n?: number
/** Total probability mass of tokens to consider at each step */
topP?: number
/** respond token usage for current QA */
tokens?: number
/** Maximum number of tokens to generate in the completion. -1 returns as many tokens as possible given the prompt and the model's maximum context size. */
maxTokens?: number
/** quota number of tokens, will be updated in LLM response. */
quotaTokens?: number
} & Record<string, unknown> = {},
) {
statusCode ? (this.statusCode = statusCode) : (this.id = nanoid())
Expand Down
24 changes: 15 additions & 9 deletions src/bots/huggingface/GradioBot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ export default abstract class GradioBot extends LLMBot {
return available
}

async _sendPrompt(prompt: ChatDto, streamCallback?: (msg: ChatDto) => void): Promise<ChatDto> {
async _sendPrompt(msg: ChatDto): Promise<ChatDto> {
let result: ChatDto = new ChatDto('', -1)
for (const key in this._fnIndexes) {
const fn_index = this._fnIndexes[key]
const resp = await this._sendFnIndex(fn_index, prompt, streamCallback)
const resp = await this._sendFnIndex(fn_index, msg, msg.options?.stream)
resp && !resp.statusCode && resp.text && (result = resp)
}
this._formalizeResponse(result)
Expand Down Expand Up @@ -115,13 +115,18 @@ export default abstract class GradioBot extends LLMBot {
} else if (event.msg === 'process_completed') {
// Done
if (event.success && event.output.data) {
const prompt = this.parseData(fn_index, event.output.data)
const resp = new ChatDto(
prompt,
fn_index == this._fnIndexes.at(-1) ? 0 : -1, // Only the last one is done
)
streamCallback && streamCallback(resp)
resolve(resp)
if (typeof event.output.data[2] !== 'string' || event.output.data[2] === '') {
const prompt = this.parseData(fn_index, event.output.data)
const resp = new ChatDto(
prompt,
fn_index == this._fnIndexes.at(-1) ? 0 : -1, // Only the last one is done
)
streamCallback && streamCallback(resp)
resolve(resp)
} else {
const errorMsg = this.parseError(event.output.data[2])
reject(new Error(errorMsg))
}
} else {
reject(new Error(event.output.error))
}
Expand Down Expand Up @@ -163,6 +168,7 @@ export default abstract class GradioBot extends LLMBot {
}
}

abstract parseError(error: string): string
abstract makeData(fn_index: number, prompt: ChatDto): unknown
abstract parseData(fn_index: number, data: unknown): string

Expand Down
14 changes: 12 additions & 2 deletions src/bots/lmsys/LMSYSBot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export default class LMSYSBot extends GradioBot {
_lock = new AsyncLock() // FIXME Send requests in queue to save LMSYS

constructor(name: string, readonly _model: string) {
super(name, 'https://chat.lmsys.org/', [7, 8], 'html')
super(name, 'https://chat.lmsys.org/', [9, 10], 'html')
}

/** needn't token */
Expand Down Expand Up @@ -42,6 +42,16 @@ export default class LMSYSBot extends GradioBot {
if (fn_index === this._fnIndexes[1]) {
r = data[1].at(-1)[1]
}
return r
return r || ''
}

parseError(errorMsg: string) {
if (errorMsg.includes('REFRESH THIS PAGE')) {
errorMsg = errorMsg.replace(
'REFRESH THIS PAGE',
`<a href="${this._loginUrl}" target="innerWindow">REFRESH THIS PAGE</a>`,
)
}
return errorMsg
}
}
5 changes: 4 additions & 1 deletion src/bots/microsoft/AzureOpenAIAPIBot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@ export default class AzureOpenAIAPIBot extends LangChainBot {
const conf: Record<string, any> = typeof config == 'function' ? (config = config()) : config
if (!conf?.azureApiKey) return false

this._usage = !conf.no_usage_report // if stream = true, token usage not available
this._chatModel = new ChatOpenAI({
modelName: conf.modelName,
azureOpenAIApiKey: conf.azureApiKey,
azureOpenAIApiInstanceName: conf.azureApiInstanceName,
azureOpenAIApiDeploymentName: conf.azureOpenAIApiDeploymentName,
azureOpenAIApiVersion: conf.azureOpenAIApiVersion,
temperature: conf.temperature,
streaming: true,
streaming: false,
})
// curl -X POST -H 'Content-type: application/json' -H 'User-Agent: OpenAI/NodeJS/3.3.0' -H 'api-key: c30fe7f14b464d52b515f77148643d60' -H 'Authorization: Bearer undefined' --data '{"model":"gpt-3.5-turbo","temperature":0.7,"top_p":1,"frequency_penalty":0,"presence_penalty":0,"n":1,"stream":false,"messages":[{"role":"user","content":"!"}]}' https://openai-kanjian.openai.azure.com/openai/deployments/gpt-35-turbo/chat/completi ns\?api-version=2023-05-15

return !!this._chatModel
}
Expand Down
2 changes: 1 addition & 1 deletion src/storage/chat-history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export class ChatHistory {

if (!msg.options.stateless && parent.options.leaf) {
delete parent.options.leaf
await this._storage.set(msg.id as string, [preId, parent])
await this._storage.set(parent.id as string, [preId, parent])
}
} else if (pid !== '') {
// append to current conversation
Expand Down
2 changes: 1 addition & 1 deletion test/llmbots.e2e.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ describe('builtin LLMBots: vicuna-13b (e2e)', () => {

// contextual conversation
const req = new ChatDto("What's his wife's full name. reply 5 words most")
const resp = await claudeBot?.sendPrompt(req, msg => console.log(msg))
const resp = await claudeBot?.sendPrompt(req)
console.log(resp)
expect(resp?.text).not.toBeNull()
expect(resp?.options.lastMsgId).toEqual(req.id)
Expand Down

0 comments on commit 5be901a

Please sign in to comment.