diff --git a/src/lib/ChatRequestPetals.svelte b/src/lib/ChatRequestPetals.svelte index 9d72ebe..f7e7833 100644 --- a/src/lib/ChatRequestPetals.svelte +++ b/src/lib/ChatRequestPetals.svelte @@ -25,10 +25,16 @@ export const runPetalsCompletionRequest = async ( ws.close() } signal.addEventListener('abort', abortListener) - const stopSequences = modelDetail.stop || ['###'] + const stopSequences = (modelDetail.stop || ['###', '']).slice() const stopSequence = getStopSequence(chat) - const stopSequencesC = stopSequences.slice() - if (stopSequence === stopSequencesC[0]) stopSequencesC.shift() + let stopSequenceC = stopSequence + if (stopSequence !== '###') { + stopSequences.push(stopSequence) + stopSequenceC = '' + } + const stopSequencesC = stopSequences.filter((ss) => { + return ss !== '###' && ss !== stopSequenceC + }) const maxTokens = getModelMaxTokens(model) let maxLen = Math.min(opts.maxTokens || chatRequest.chat.max_tokens || maxTokens, maxTokens) const promptTokenCount = chatResponse.getPromptTokenCount() @@ -36,6 +42,16 @@ export const runPetalsCompletionRequest = async ( maxLen = Math.min(maxLen + promptTokenCount, maxTokens) } chatResponse.onFinish(() => { + const message = chatResponse.getMessages()[0] + if (message) { + for (let i = 0, l = stopSequences.length; i < l; i++) { + const ss = stopSequences[i].trim() + if (message.content.trim().endsWith(ss)) { + message.content = message.content.trim().slice(0, message.content.trim().length - ss.length) + updateMessages(chat.id) + } + } + } chatRequest.updating = false chatRequest.updatingMessage = '' }) @@ -55,8 +71,8 @@ export const runPetalsCompletionRequest = async ( } const rMessages = request.messages || [] as Message[] // make sure top_p and temperature are set the way we need - let temperature = request.temperature || 0 - if (isNaN(temperature as any)) temperature = 1 + let temperature = request.temperature + if (temperature === undefined || isNaN(temperature as any)) temperature = 1 if (!temperature || temperature <= 0) temperature = 0.01 let topP = request.top_p if (topP === undefined || isNaN(topP as any)) topP = 1 @@ -64,7 +80,7 @@ export const runPetalsCompletionRequest = async ( // build the message array const inputArray = (rMessages).reduce((a, m) => { const c = getRoleTag(m.role, model, chatRequest.chat) + m.content - a.push(c) + a.push(c.trim()) return a }, [] as string[]) const lastMessage = rMessages[rMessages.length - 1] @@ -75,12 +91,12 @@ export const runPetalsCompletionRequest = async ( type: 'generate', inputs: inputArray.join(stopSequence), max_new_tokens: 1, // wait for up to 1 tokens before displaying - stop_sequence: stopSequence, + stop_sequence: stopSequenceC, do_sample: 1, // enable top p and the like temperature, - top_p: topP, - extra_stop_sequences: stopSequencesC - } + top_p: topP + } as any + if (stopSequencesC.length) petalsRequest.extra_stop_sequences = stopSequencesC ws.send(JSON.stringify(petalsRequest)) ws.onmessage = event => { // Remove updating indicator @@ -106,17 +122,6 @@ export const runPetalsCompletionRequest = async ( }] } as any ) - if (response.stop) { - const message = chatResponse.getMessages()[0] - if (message) { - for (let i = 0, l = stopSequences.length; i < l; i++) { - if (message.content.endsWith(stopSequences[i])) { - message.content = message.content.slice(0, message.content.length - stopSequences[i].length) - updateMessages(chat.id) - } - } - } - } }, 1) } } diff --git a/src/lib/Models.svelte b/src/lib/Models.svelte index b879b9e..f4ca2c1 100644 --- a/src/lib/Models.svelte +++ b/src/lib/Models.svelte @@ -42,27 +42,49 @@ const modelDetails : Record = { completion: 0.000004, // $0.004 per 1000 tokens completion max: 16384 // 16k max token buffer }, + 'enoch/llama-65b-hf': { + type: 'Petals', + label: 'Petals - Llama-65b', + stop: ['###', ''], + userStart: '<|user|>', + assistantStart: '<|[[CHARACTER_NAME]]|>', + systemStart: '', + prompt: 0.000000, // $0.000 per 1000 tokens prompt + completion: 0.000000, // $0.000 per 1000 tokens completion + max: 2048 // 2k max token buffer + }, + 'timdettmers/guanaco-65b': { + type: 'Petals', + label: 'Petals - Guanaco-65b', + stop: ['###', ''], + userStart: '<|user|>', + assistantStart: '<|[[CHARACTER_NAME]]|>', + systemStart: '', + prompt: 0.000000, // $0.000 per 1000 tokens prompt + completion: 0.000000, // $0.000 per 1000 tokens completion + max: 2048 // 2k max token buffer + }, 'meta-llama/Llama-2-70b-chat-hf': { type: 'Petals', label: 'Petals - Llama-2-70b-chat', - stop: [''], - userStart: '[user]', - assistantStart: '[[[CHARACTER_NAME]]]', + stop: ['###', ''], + userStart: '<|user|>', + assistantStart: '<|[[CHARACTER_NAME]]|>', systemStart: '', prompt: 0.000000, // $0.000 per 1000 tokens prompt completion: 0.000000, // $0.000 per 1000 tokens completion max: 4096 // 4k max token buffer }, - 'timdettmers/guanaco-65b': { + 'meta-llama/Llama-2-70b-hf': { type: 'Petals', - label: 'Petals - guanaco-65b', - stop: [''], - userStart: '[user]', - assistantStart: '[[[CHARACTER_NAME]]]', + label: 'Petals - Llama-2-70b', + stop: ['###', ''], + userStart: '<|user|>', + assistantStart: '<|[[CHARACTER_NAME]]|>', systemStart: '', prompt: 0.000000, // $0.000 per 1000 tokens prompt completion: 0.000000, // $0.000 per 1000 tokens completion - max: 2048 // 2k max token buffer + max: 4096 // 4k max token buffer } } @@ -107,8 +129,10 @@ export const supportedModels : Record = { 'gpt-4-32k': modelDetails['gpt-4-32k'], 'gpt-4-32k-0314': modelDetails['gpt-4-32k'], 'gpt-4-32k-0613': modelDetails['gpt-4-32k'], + 'enoch/llama-65b-hf': modelDetails['enoch/llama-65b-hf'], + 'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b'], + 'meta-llama/Llama-2-70b-hf': modelDetails['meta-llama/Llama-2-70b-hf'], 'meta-llama/Llama-2-70b-chat-hf': modelDetails['meta-llama/Llama-2-70b-chat-hf'] - // 'timdettmers/guanaco-65b': modelDetails['timdettmers/guanaco-65b'] } const lookupList = { @@ -154,27 +178,27 @@ export const getEndpoint = (model: Model): string => { } export const getStopSequence = (chat: Chat): string => { - return valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder) + return chat.settings.stopSequence || valueOf(chat.id, getChatSettingObjectByKey('stopSequence').placeholder) } export const getUserStart = (chat: Chat): string => { return mergeProfileFields( chat.settings, - valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder) + chat.settings.userMessageStart || valueOf(chat.id, getChatSettingObjectByKey('userMessageStart').placeholder) ) } export const getAssistantStart = (chat: Chat): string => { return mergeProfileFields( chat.settings, - valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder) + chat.settings.assistantMessageStart || valueOf(chat.id, getChatSettingObjectByKey('assistantMessageStart').placeholder) ) } export const getSystemStart = (chat: Chat): string => { return mergeProfileFields( chat.settings, - valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder) + chat.settings.systemMessageStart || valueOf(chat.id, getChatSettingObjectByKey('systemMessageStart').placeholder) ) }