From bbd51f29eb23804fc54facdf89270c1c5aab4fcd Mon Sep 17 00:00:00 2001 From: Webifi Date: Fri, 8 Sep 2023 10:56:05 -0500 Subject: [PATCH 1/3] Default to StableBeluga2 for Petals --- src/lib/Settings.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/Settings.svelte b/src/lib/Settings.svelte index 15a0594..2cdf58c 100644 --- a/src/lib/Settings.svelte +++ b/src/lib/Settings.svelte @@ -21,7 +21,7 @@ import { import { getModelDetail, getTokens } from './Models.svelte' const defaultModel:Model = 'gpt-3.5-turbo' -const defaultModelPetals:Model = 'meta-llama/Llama-2-70b-chat-hf' +const defaultModelPetals:Model = 'stabilityai/StableBeluga2' export const getDefaultModel = (): Model => { if (!get(apiKeyStorage)) return defaultModelPetals From 819faddb38351bc28ebf482241200baef5c7de48 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sun, 10 Sep 2023 19:03:00 -0500 Subject: [PATCH 2/3] Fix incorrect URL encoding --- src/lib/ChatSettingsModal.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/ChatSettingsModal.svelte b/src/lib/ChatSettingsModal.svelte index be98ff9..68221d2 100644 --- a/src/lib/ChatSettingsModal.svelte +++ b/src/lib/ChatSettingsModal.svelte @@ -102,8 +102,8 @@ // location.protocol + '//' + location.host + location.pathname const uri = '#/chat/new?petals=true&' + Object.entries(chatSettings).reduce((a, [k, v]) => { const t = typeof v - if (hasChatSetting(k) && (t === 'boolean' || t === 'string' || t === 'number')) { - a.push(encodeURI(k) + '=' + encodeURI(v as any)) + if (hasChatSetting(k as any) && (t === 'boolean' || t === 'string' || t === 'number')) { + a.push(encodeURIComponent(k) + '=' + encodeURIComponent(v as any)) } return a }, [] as string[]).join('&') From e3095bdf41fce86ca1fd44f4fc9840866125c9d3 Mon Sep 17 00:00:00 2001 From: Webifi Date: Sun, 10 Sep 2023 19:32:33 -0500 Subject: [PATCH 3/3] Fix some types --- src/lib/providers/openai/models.svelte | 2 +- src/lib/providers/petals/models.svelte | 10 ++++++++-- src/lib/providers/petals/request.svelte | 15 +++++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/lib/providers/openai/models.svelte b/src/lib/providers/openai/models.svelte index c46fbe5..2e801ae 100644 --- a/src/lib/providers/openai/models.svelte +++ b/src/lib/providers/openai/models.svelte @@ -23,7 +23,7 @@ const hiddenSettings = { repetitionPenalty: true, holdSocket: true // leadPrompt: true -} +} as any const chatModelBase = { type: 'chat', diff --git a/src/lib/providers/petals/models.svelte b/src/lib/providers/petals/models.svelte index 44467c7..5e3ef96 100644 --- a/src/lib/providers/petals/models.svelte +++ b/src/lib/providers/petals/models.svelte @@ -14,7 +14,7 @@ const hideSettings = { n: true, presence_penalty: true, frequency_penalty: true -} +} as any const chatModelBase = { type: 'instruct', // Used for chat, but these models operate like instruct models -- you have to manually structure the messages sent to them @@ -85,8 +85,14 @@ export const chatModels : Record = { }, 'stabilityai/StableBeluga2': { ...chatModelBase, - label: 'Petals - StableBeluga-2' + label: 'Petals - StableBeluga-2-70b' } + // 'tiiuae/falcon-180B-chat': { + // ...chatModelBase, + // start: '###', + // stop: ['###', '', '<|endoftext|>'], + // label: 'Petals - Falcon-180b-chat' + // } } \ No newline at end of file diff --git a/src/lib/providers/petals/request.svelte b/src/lib/providers/petals/request.svelte index 33eb7be..70c777d 100644 --- a/src/lib/providers/petals/request.svelte +++ b/src/lib/providers/petals/request.svelte @@ -70,13 +70,15 @@ export const chatRequest = async ( stopSequences = stopSequences.sort((a, b) => b.length - a.length) const stopSequencesC = stopSequences.filter(s => s !== stopSequence) const maxTokens = getModelMaxTokens(model) + const userAfterSystem = true // Enforce strict order of messages const fMessages = (request.messages || [] as Message[]) const rMessages = fMessages.reduce((a, m, i) => { a.push(m) + // if (m.role === 'system') m.content = m.content.trim() const nm = fMessages[i + 1] - if (m.role === 'system' && (!nm || nm.role !== 'user')) { + if (userAfterSystem && m.role === 'system' && (!nm || nm.role !== 'user')) { const nc = { role: 'user', content: '' @@ -97,7 +99,7 @@ export const chatRequest = async ( const buildMessage = (m: Message): string => { return getRoleTag(m.role, model, chat) + m.content + getRoleEnd(m.role, model, chat) } - const buildInputArray = (a) => { + const buildInputArray = (a: Message[]) => { return a.reduce((a, m, i) => { let c = buildMessage(m) let replace = false @@ -141,7 +143,7 @@ export const chatRequest = async ( } // const inputArray = buildInputArray(rMessages).map(m => m.content) const lInputArray = doLead - ? buildInputArray(rMessages.slice(0, -1)).map(m => m.content) + ? (rMessages.length > 1 ? buildInputArray(rMessages.slice(0, -1)).map(m => m.content) : []) : buildInputArray(rMessages.slice()).map(m => m.content) const nInputArray = buildInputArray(rMessages.slice(-1)).map(m => m.content) const leadPrompt = (leadPromptSequence && doLead) ? delimiter + leadPromptSequence : '' @@ -194,7 +196,7 @@ export const chatRequest = async ( throw err } // console.warn('got new ws') - inputPrompt = lastPrompt + (doLead ? delimiter : '') + inputPrompt = lastPrompt + (doLead && lInputArray.length ? delimiter : '') providerData.knownBuffer = '' providerData.ws = nws resolve(nws) @@ -217,11 +219,12 @@ export const chatRequest = async ( } // update with real count chatResponse.setPromptTokenCount(promptTokenCount) - nws.send(JSON.stringify({ + const req = { type: 'open_inference_session', model, max_length: chatSettings.holdSocket ? maxTokens : maxLen - })) + } as any + nws.send(JSON.stringify(req)) } })