Compare commits

..

8 Commits

Author SHA1 Message Date
Enrico Ros 13f502bd76 1.7.1: Release (Ollama chat). #270 2023-12-10 22:17:35 -08:00
Enrico Ros 11055b12ca Ollama: use the new Chat endpoint. Closes #270 2023-12-10 22:12:51 -08:00
Enrico Ros d0ea96eec0 Ollama: Admin: optional sort by Pulls, and UI link to the Model page 2023-12-10 22:03:55 -08:00
Enrico Ros 02eafc03f1 Ollama: update models, and sort by Featured 2023-12-10 22:01:50 -08:00
Enrico Ros 33d07a0313 Ollama: update documentation 2023-12-10 21:30:30 -08:00
Enrico Ros 763b852148 Ollama: administration: external link 2023-12-10 20:24:20 -08:00
Enrico Ros d5b0617fd7 Comment for now 2023-12-10 06:14:49 -08:00
Enrico Ros e3ce83674c Update Ollama 2023-12-10 06:09:54 -08:00
13 changed files with 243 additions and 113 deletions
+2 -1
View File
@@ -21,7 +21,7 @@ shows the current developments and future ideas.
- Got a suggestion? [_Add your roadmap ideas_](https://github.com/enricoros/big-agi/issues/new?&template=roadmap-request.md)
- Want to contribute? [_Pick up a task!_](https://github.com/users/enricoros/projects/4/views/4) - _easy_ to _pro_
### What's New in 1.7.0 · Dec 10, 2023 · Attachment Theory 🌟
### What's New in 1.7.1 · Dec 11, 2023 · Attachment Theory 🌟
- **Attachments System Overhaul**: Drag, paste, link, snap, text, images, PDFs and more. [#251](https://github.com/enricoros/big-agi/issues/251)
- **Desktop Webcam Capture**: Image capture now available as Labs feature. [#253](https://github.com/enricoros/big-agi/issues/253)
@@ -31,6 +31,7 @@ shows the current developments and future ideas.
- Optimized Voice Input and Performance
- Latest Ollama and Oobabooga models
- For developers: **Password Protection**: HTTP Basic Auth. [Learn How](https://github.com/enricoros/big-agi/blob/main/docs/deploy-authentication.md)
- [1.7.1]: Improved Ollama chats. [#270](https://github.com/enricoros/big-agi/issues/270)
### What's New in 1.6.0 - Nov 28, 2023
+2 -1
View File
@@ -10,7 +10,7 @@ by release.
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
- milestone: [1.8.0](https://github.com/enricoros/big-agi/milestone/8)
### What's New in 1.7.0 · Dec 10, 2023 · Attachment Theory 🌟
### What's New in 1.7.1 · Dec 11, 2023 · Attachment Theory 🌟
- **Attachments System Overhaul**: Drag, paste, link, snap, text, images, PDFs and more. [#251](https://github.com/enricoros/big-agi/issues/251)
- **Desktop Webcam Capture**: Image capture now available as Labs feature. [#253](https://github.com/enricoros/big-agi/issues/253)
@@ -20,6 +20,7 @@ by release.
- Optimized Voice Input and Performance
- Latest Ollama and Oobabooga models
- For developers: **Password Protection**: HTTP Basic Auth. [Learn How](https://github.com/enricoros/big-agi/blob/main/docs/deploy-authentication.md)
- [1.7.1]: Improved Ollama chats. [#270](https://github.com/enricoros/big-agi/issues/270)
### What's New in 1.6.0 - Nov 28, 2023 · Surf's Up
+10 -5
View File
@@ -5,15 +5,20 @@ This guide helps you connect [Ollama](https://ollama.ai) [models](https://ollama
experience. The integration brings the popular big-AGI features to Ollama, including: voice chats,
editing tools, models switching, personas, and more.
_Last updated Dec 11, 2023_
![config-local-ollama-0-example.png](pixels/config-ollama-0-example.png)
## Quick Integration Guide
1. **Ensure Ollama API Server is Running**: Before starting, make sure your Ollama API server is up and running.
2. **Add Ollama as a Model Source**: In `big-AGI`, navigate to the **Models** section, select **Add a model source**, and choose **Ollama**.
3. **Enter Ollama Host URL**: Provide the Ollama Host URL where the API server is accessible (e.g., `http://localhost:11434`).
4. **Refresh Model List**: Once connected, refresh the list of available models to include the Ollama models.
5. **Start Using AI Personas**: Select an Ollama model and begin interacting with AI personas tailored to your needs.
1. **Ensure Ollama API Server is Running**: Follow the official instructions to get Ollama up and running on your machine
2. **Add Ollama as a Model Source**: In `big-AGI`, navigate to the **Models** section, select **Add a model source**, and choose **Ollama**
3. **Enter Ollama Host URL**: Provide the Ollama Host URL where the API server is accessible (e.g., `http://localhost:11434`)
4. **Refresh Model List**: Once connected, refresh the list of available models to include the Ollama models
> Optional: use the Ollama Admin interface to see which models are available and 'Pull' them in your local machine. Note
that this operation will likely timeout due to Edge Functions timeout on the big-AGI server while pulling, and
you'll have to press the 'Pull' button again, until a green message appears.
5. **Chat with Ollama models**: select an Ollama model and begin chatting with AI personas
### Ollama: installation and Setup
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "big-agi",
"version": "1.7.0",
"version": "1.7.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "big-agi",
"version": "1.7.0",
"version": "1.7.1",
"hasInstallScript": true,
"dependencies": {
"@dqbd/tiktoken": "^1.0.7",
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "big-agi",
"version": "1.7.0",
"version": "1.7.1",
"private": true,
"scripts": {
"dev": "next dev",
+4 -2
View File
@@ -67,9 +67,10 @@ export const NewsItems: NewsItem[] = [
],
},*/
{
versionCode: '1.7.0',
versionCode: '1.7.1',
versionName: 'Attachment Theory',
versionDate: new Date('2023-12-10T12:00:00Z'), // new Date().toISOString()
versionDate: new Date('2023-12-11T06:00:00Z'), // new Date().toISOString()
// versionDate: new Date('2023-12-10T12:00:00Z'), // 1.7.0
items: [
{ text: <>Redesigned <B href={RIssues + '/251'}>attachments system</B>: drag, paste, link, snap, images, text, pdfs</> },
{ text: <>Desktop <B href={RIssues + '/253'}>webcam access</B> for direct image capture (Labs option)</> },
@@ -79,6 +80,7 @@ export const NewsItems: NewsItem[] = [
{ text: <>{platformAwareKeystrokes('Ctrl+Shift+O')}: quick access to model options</> },
{ text: <>Optimized voice input and performance</> },
{ text: <>Latest Ollama and Oobabooga models</> },
{ text: <>1.7.1: Improved <B href={RIssues + '/270'}>Ollama chats</B></> },
],
},
{
+1
View File
@@ -46,6 +46,7 @@ export const appTheme = extendTheme({
text: {
icon: 'var(--joy-palette-neutral-700)', // <IconButton color='neutral' /> icon color
secondary: 'var(--joy-palette-neutral-800)', // increase contrast a bit
// tertiary: 'var(--joy-palette-neutral-700)', // increase contrast a bit
},
// popup [white] > surface [50] > level1 [100] > level2 [200] > level3 [300] > body [white -> 400]
background: {
@@ -3,54 +3,57 @@
* descriptions for the models.
* (nor does it reliably provide context window sizes) - TODO: open a bug upstream
*
* from: https://ollama.ai/library?sort=popular
* from: https://ollama.ai/library?sort=featured
*/
export const OLLAMA_BASE_MODELS: { [key: string]: { description: string, pulls: number, added?: string } } = {
'mistral': { description: 'The Mistral 7B model released by Mistral AI', pulls: 56100 },
'llama2': { description: 'The most popular model for general use.', pulls: 117400 },
'codellama': { description: 'A large language model that can use text prompts to generate and discuss code.', pulls: 61500 },
'llama2-uncensored': { description: 'Uncensored Llama 2 model by George Sung and Jarrad Hope.', pulls: 26800 },
'orca-mini': { description: 'A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.', pulls: 23000 },
'vicuna': { description: 'General use chat model based on Llama and Llama 2 with 2K to 16K context sizes.', pulls: 20600 },
'wizard-vicuna-uncensored': { description: 'Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford.', pulls: 12100 },
'phind-codellama': { description: 'Code generation model based on CodeLlama.', pulls: 9760 },
'wizardcoder': { description: 'Llama based code generation model focused on Python.', pulls: 9002 },
'mistral-openorca': { description: 'Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset.', pulls: 8671 },
'nous-hermes': { description: 'General use models based on Llama and Llama 2 from Nous Research.', pulls: 8478 },
'zephyr': { description: 'Zephyr beta is a fine-tuned 7B version of mistral that was trained on on a mix of publicly available, synthetic datasets.', pulls: 8142 },
'wizard-math': { description: 'Model focused on math and logic problems', pulls: 7426 },
'llama2-chinese': { description: 'Llama 2 based model fine tuned to improve Chinese dialogue ability.', pulls: 7035 },
'stable-beluga': { description: 'Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy.', pulls: 6140 },
'falcon': { description: 'A large language model built by the Technology Innovation Institute (TII) for use in summarization, text generation, and chat bots.', pulls: 5865 },
'codeup': { description: 'Great code generation model based on Llama2.', pulls: 5534 },
'everythinglm': { description: 'Uncensored Llama2 based model with 16k context size.', pulls: 4696 },
'medllama2': { description: 'Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset.', pulls: 4275 },
'wizardlm-uncensored': { description: 'Uncensored version of Wizard LM model.', pulls: 4227 },
'deepseek-coder': { description: 'DeepSeek Coder is trained from scratch on both 87% code and 13% natural language in English and Chinese. Each of the models are pre-trained on 2 trillion tokens.', pulls: 3663, added: '20231129' },
'wizard-vicuna': { description: 'Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj.', pulls: 3343 },
'orca2': { description: 'Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta\'s Llama 2 models. The model is designed to excel particularly in reasoning.', pulls: 3134, added: '20231129' },
'open-orca-platypus2': { description: 'Merge of the Open Orca OpenChat model and the Garage-bAInd Platypus 2 model. Designed for chat and code generation.', pulls: 3050 },
'starcoder': { description: 'StarCoder is a code generation model trained on 80+ programming languages.', pulls: 2981 },
'dolphin2.2-mistral': { description: 'An instruct-tuned model based on Mistral. Version 2.2 is fine-tuned for improved conversation and empathy.', pulls: 2636 },
'yarn-mistral': { description: 'An extension of Mistral to support a context of up to 128k tokens.', pulls: 2328 },
'openchat': { description: 'A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks.', pulls: 2281, added: '20231129' },
'openhermes2.5-mistral': { description: 'OpenHermes 2.5 Mistral 7B is a Mistral 7B fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.', pulls: 2101 },
'yi': { description: 'A high-performing, bilingual base model.', pulls: 1806 },
'samantha-mistral': { description: 'A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral.', pulls: 1803 },
'yarn-llama2': { description: 'An extension of Llama 2 that supports a context of up to 128k tokens.', pulls: 1605 },
'sqlcoder': { description: 'SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks.', pulls: 1584 },
'openhermes2-mistral': { description: 'OpenHermes 2 Mistral is a 7B model fine-tuned on Mistral with 900,000 entries of primarily GPT-4 generated data from open datasets.', pulls: 1560 },
'neural-chat': { description: 'A fine-tuned model based on Mistral with good coverage of domain and language.', pulls: 1338, added: '20231129' },
'wizardlm': { description: 'General use 70 billion parameter model based on Llama 2.', pulls: 1253 },
'dolphin2.1-mistral': { description: 'An instruct-tuned model based on Mistral and trained on a dataset filtered to remove alignment and bias.', pulls: 1163 },
'mistrallite': { description: 'MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts.', pulls: 1099 },
'codebooga': { description: 'A high-performing code instruct model created by merging two existing code models.', pulls: 1042 },
'goliath': { description: 'A language model created by combining two fine-tuned Llama 2 70B models into one.', pulls: 728, added: '20231129' },
'xwinlm': { description: 'Conversational model based on Llama 2 that performs competitively on various benchmarks.', pulls: 593 },
'nexusraven': { description: 'Nexus Raven is a 13B instruction tuned model for function calling tasks.', pulls: 585 },
'alfred': { description: 'A robust conversational model designed to be used for both chat and instruct use cases.', pulls: 573, added: '20231129' },
'starling-lm': { description: 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.', pulls: 446, added: '20231129' },
'meditron': { description: 'Open-source medical large language model adapted from Llama 2 to the medical domain.', pulls: 100, added: '20231129' },
'deepseek-llm': { description: 'An advanced language model crafted with 2 trillion bilingual tokens.', pulls: 11, added: '20231129' },
'starling-lm': { description: 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.', pulls: 2353, added: '20231129' },
'neural-chat': { description: 'A fine-tuned model based on Mistral with good coverage of domain and language.', pulls: 3089, added: '20231129' },
'mistral': { description: 'The Mistral 7B model released by Mistral AI', pulls: 70300 },
'yi': { description: 'A high-performing, bilingual base model.', pulls: 2673 },
'llama2': { description: 'The most popular model for general use.', pulls: 141000 },
'codellama': { description: 'A large language model that can use text prompts to generate and discuss code.', pulls: 71400 },
'llama2-uncensored': { description: 'Uncensored Llama 2 model by George Sung and Jarrad Hope.', pulls: 30900 },
'orca-mini': { description: 'A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.', pulls: 26000 },
'vicuna': { description: 'General use chat model based on Llama and Llama 2 with 2K to 16K context sizes.', pulls: 21800 },
'wizard-vicuna-uncensored': { description: 'Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford.', pulls: 13700 },
'phind-codellama': { description: 'Code generation model based on CodeLlama.', pulls: 10600 },
'zephyr': { description: 'Zephyr beta is a fine-tuned 7B version of mistral that was trained on on a mix of publicly available, synthetic datasets.', pulls: 10200 },
'wizardcoder': { description: 'Llama based code generation model focused on Python.', pulls: 9895 },
'mistral-openorca': { description: 'Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset.', pulls: 9256 },
'nous-hermes': { description: 'General use models based on Llama and Llama 2 from Nous Research.', pulls: 8827 },
'wizard-math': { description: 'Model focused on math and logic problems', pulls: 7849 },
'llama2-chinese': { description: 'Llama 2 based model fine tuned to improve Chinese dialogue ability.', pulls: 7375 },
'deepseek-coder': { description: 'DeepSeek Coder is trained from scratch on both 87% code and 13% natural language in English and Chinese. Each of the models are pre-trained on 2 trillion tokens.', pulls: 7335, added: '20231129' },
'falcon': { description: 'A large language model built by the Technology Innovation Institute (TII) for use in summarization, text generation, and chat bots.', pulls: 6726 },
'stable-beluga': { description: 'Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy.', pulls: 6272 },
'codeup': { description: 'Great code generation model based on Llama2.', pulls: 5978 },
'orca2': { description: 'Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta\'s Llama 2 models. The model is designed to excel particularly in reasoning.', pulls: 5854, added: '20231129' },
'everythinglm': { description: 'Uncensored Llama2 based model with 16k context size.', pulls: 5040 },
'medllama2': { description: 'Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset.', pulls: 4648 },
'wizardlm-uncensored': { description: 'Uncensored version of Wizard LM model.', pulls: 4536 },
'dolphin2.2-mistral': { description: 'An instruct-tuned model based on Mistral. Version 2.2 is fine-tuned for improved conversation and empathy.', pulls: 3638 },
'starcoder': { description: 'StarCoder is a code generation model trained on 80+ programming languages.', pulls: 3638 },
'wizard-vicuna': { description: 'Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj.', pulls: 3485 },
'openchat': { description: 'A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks.', pulls: 3438, added: '20231129' },
'open-orca-platypus2': { description: 'Merge of the Open Orca OpenChat model and the Garage-bAInd Platypus 2 model. Designed for chat and code generation.', pulls: 3145 },
'openhermes2.5-mistral': { description: 'OpenHermes 2.5 Mistral 7B is a Mistral 7B fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.', pulls: 3023 },
'yarn-mistral': { description: 'An extension of Mistral to support a context of up to 128k tokens.', pulls: 2775 },
'samantha-mistral': { description: 'A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral.', pulls: 2192 },
'sqlcoder': { description: 'SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks', pulls: 1973 },
'yarn-llama2': { description: 'An extension of Llama 2 that supports a context of up to 128k tokens.', pulls: 1915 },
'openhermes2-mistral': { description: 'OpenHermes 2 Mistral is a 7B model fine-tuned on Mistral with 900,000 entries of primarily GPT-4 generated data from open datasets.', pulls: 1690 },
'meditron': { description: 'Open-source medical large language model adapted from Llama 2 to the medical domain.', pulls: 1667, added: '20231129' },
'wizardlm': { description: 'General use 70 billion parameter model based on Llama 2.', pulls: 1379 },
'mistrallite': { description: 'MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts.', pulls: 1345 },
'deepseek-llm': { description: 'An advanced language model crafted with 2 trillion bilingual tokens.', pulls: 1318, added: '20231129' },
'dolphin2.1-mistral': { description: 'An instruct-tuned model based on Mistral and trained on a dataset filtered to remove alignment and bias.', pulls: 1302 },
'codebooga': { description: 'A high-performing code instruct model created by merging two existing code models.', pulls: 1254 },
'goliath': { description: 'A language model created by combining two fine-tuned Llama 2 70B models into one.', pulls: 946, added: '20231129' },
'stablelm-zephyr': { description: 'A lightweight chat model allowing accurate, and responsive output without requiring high-end hardware.', pulls: 945, added: '20231210' },
'nexusraven': { description: 'Nexus Raven is a 13B instruction tuned model for function calling tasks.', pulls: 860 },
'magicoder': { description: '🎩 Magicoder is a family of 7B parameter models trained on 75K synthetic instruction data using OSS-Instruct, a novel approach to enlightening LLMs with open-source code snippets.', pulls: 816, added: '20231210' },
'alfred': { description: 'A robust conversational model designed to be used for both chat and instruct use cases.', pulls: 804, added: '20231129' },
'xwinlm': { description: 'Conversational model based on Llama 2 that performs competitively on various benchmarks.', pulls: 706 },
};
export const OLLAMA_LAST_UPDATE: string = '20231129';
// export const OLLAMA_LAST_UPDATE: string = '20231210';
export const OLLAMA_PREV_UPDATE: string = '20231129';
@@ -11,12 +11,15 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
import { fixupHost, openAIChatGenerateOutputSchema, OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
import { listModelsOutputSchema, ModelDescriptionSchema } from '../server.schemas';
import { OLLAMA_BASE_MODELS, OLLAMA_LAST_UPDATE } from './ollama.models';
import { wireOllamaGenerationSchema } from './ollama.wiretypes';
import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
import { WireOllamaChatCompletionInput, wireOllamaChunkedOutputSchema } from './ollama.wiretypes';
// Default hosts
const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434';
export const OLLAMA_PATH_CHAT = '/api/chat';
const OLLAMA_PATH_TAGS = '/api/tags';
const OLLAMA_PATH_SHOW = '/api/show';
// Mappers
@@ -34,7 +37,23 @@ export function ollamaAccess(access: OllamaAccessSchema, apiPath: string): { hea
}
export function ollamaChatCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean) {
export const ollamaChatCompletionPayload = (model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): WireOllamaChatCompletionInput => ({
model: model.id,
messages: history,
options: {
...(model.temperature && { temperature: model.temperature }),
},
// n: ...
// functions: ...
// function_call: ...
stream,
});
/* Unused: switched to the Chat endpoint (above). The implementation is left here for reference.
https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean) {
// if the first message is the system prompt, extract it
let systemPrompt: string | undefined = undefined;
@@ -62,7 +81,7 @@ export function ollamaChatCompletionPayload(model: OpenAIModelSchema, history: O
...(systemPrompt && { system: systemPrompt }),
stream,
};
}
}*/
async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
const { headers, url } = ollamaAccess(access, apiPath);
@@ -104,6 +123,7 @@ const listPullableOutputSchema = z.object({
label: z.string(),
tag: z.string(),
description: z.string(),
pulls: z.number(),
isNew: z.boolean(),
})),
});
@@ -122,7 +142,8 @@ export const llmOllamaRouter = createTRPCRouter({
label: capitalizeFirstLetter(model_id),
tag: 'latest',
description: model.description,
isNew: !!model.added && model.added >= OLLAMA_LAST_UPDATE,
pulls: model.pulls,
isNew: !!model.added && model.added >= OLLAMA_PREV_UPDATE,
})),
};
}),
@@ -160,6 +181,7 @@ export const llmOllamaRouter = createTRPCRouter({
throw new Error('Ollama delete issue: ' + deleteOutput);
}),
/* Ollama: List the Models available */
listModels: publicProcedure
.input(accessOnlySchema)
@@ -167,7 +189,7 @@ export const llmOllamaRouter = createTRPCRouter({
.query(async ({ input }) => {
// get the models
const wireModels = await ollamaGET(input.access, '/api/tags');
const wireModels = await ollamaGET(input.access, OLLAMA_PATH_TAGS);
const wireOllamaListModelsSchema = z.object({
models: z.array(z.object({
name: z.string(),
@@ -180,7 +202,7 @@ export const llmOllamaRouter = createTRPCRouter({
// retrieve info for each of the models (/api/show, post call, in parallel)
const detailedModels = await Promise.all(models.map(async model => {
const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, '/api/show');
const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, OLLAMA_PATH_SHOW);
const wireOllamaModelInfoSchema = z.object({
license: z.string().optional(),
modelfile: z.string(),
@@ -221,12 +243,15 @@ export const llmOllamaRouter = createTRPCRouter({
.output(openAIChatGenerateOutputSchema)
.mutation(async ({ input: { access, history, model } }) => {
const wireGeneration = await ollamaPOST(access, ollamaChatCompletionPayload(model, history, false), '/api/generate');
const generation = wireOllamaGenerationSchema.parse(wireGeneration);
const wireGeneration = await ollamaPOST(access, ollamaChatCompletionPayload(model, history, false), OLLAMA_PATH_CHAT);
const generation = wireOllamaChunkedOutputSchema.parse(wireGeneration);
if (!generation.message?.content)
throw new Error('Ollama chat generation (non-stream) issue: ' + JSON.stringify(wireGeneration));
return {
role: 'assistant',
content: generation.response,
content: generation.message.content,
finish_reason: generation.done ? 'stop' : null,
};
}),
@@ -1,16 +1,69 @@
import { z } from 'zod';
export const wireOllamaGenerationSchema = z.object({
/**
* Chat Completion API - Request
* https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-chat-completion
*/
const wireOllamaChatCompletionInputSchema = z.object({
// required
model: z.string(),
messages: z.array(z.object({
role: z.enum(['assistant', 'system', 'user']),
content: z.string(),
})),
// optional
format: z.enum(['json']).optional(),
options: z.object({
// https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md
// Maximum number of tokens to predict when generating text.
num_predict: z.number().int().optional(),
// Sets the random number seed to use for generation
seed: z.number().int().optional(),
// The temperature of the model
temperature: z.number().positive().optional(),
// Reduces the probability of generating nonsense (Default: 40)
top_k: z.number().positive().optional(),
// Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text. (Default 0.9)
top_p: z.number().positive().optional(),
}).optional(),
template: z.string().optional(), // overrides what is defined in the Modelfile
stream: z.boolean().optional(), // default: true
// Future Improvements?
// n: z.number().int().optional(), // number of completions to generate
// functions: ...
// function_call: ...
});
export type WireOllamaChatCompletionInput = z.infer<typeof wireOllamaChatCompletionInputSchema>;
/**
* Chat Completion or Generation APIs - Streaming Response
*/
export const wireOllamaChunkedOutputSchema = z.object({
model: z.string(),
// created_at: z.string(), // commented because unused
response: z.string(),
// [Chat Completion] (exclusive with 'response')
message: z.object({
role: z.enum(['assistant' /*, 'system', 'user' Disabled on purpose, to validate the response */]),
content: z.string(),
}).optional(), // optional on the last message
// [Generation] (non-chat, exclusive with 'message')
//response: z.string().optional(),
done: z.boolean(),
// only on the last message
// context: z.array(z.number()),
// context: z.array(z.number()), // non-chat endpoint
// total_duration: z.number(),
// load_duration: z.number(),
// eval_duration: z.number(),
// prompt_eval_count: z.number(),
// prompt_eval_duration: z.number(),
// eval_count: z.number(),
});
// eval_duration: z.number(),
});
@@ -6,10 +6,10 @@ import { createEmptyReadableStream, debugGenerateCurlCommand, safeErrorString, S
import type { AnthropicWire } from '../anthropic/anthropic.wiretypes';
import type { OpenAIWire } from './openai.wiretypes';
import { OLLAMA_PATH_CHAT, ollamaAccess, ollamaAccessSchema, ollamaChatCompletionPayload } from '../ollama/ollama.router';
import { anthropicAccess, anthropicAccessSchema, anthropicChatCompletionPayload } from '../anthropic/anthropic.router';
import { ollamaAccess, ollamaAccessSchema, ollamaChatCompletionPayload } from '../ollama/ollama.router';
import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai.router';
import { wireOllamaGenerationSchema } from '../ollama/ollama.wiretypes';
import { wireOllamaChunkedOutputSchema } from '../ollama/ollama.wiretypes';
/**
@@ -59,10 +59,10 @@ export async function openaiStreamingRelayHandler(req: NextRequest): Promise<Res
break;
case 'ollama':
headersUrl = ollamaAccess(access, '/api/generate');
headersUrl = ollamaAccess(access, OLLAMA_PATH_CHAT);
body = ollamaChatCompletionPayload(model, history, true);
eventStreamFormat = 'json-nl';
vendorStreamParser = createOllamaStreamParser();
vendorStreamParser = createOllamaChatCompletionStreamParser();
break;
case 'azure':
@@ -135,30 +135,35 @@ function createAnthropicStreamParser(): AIStreamParser {
};
}
function createOllamaStreamParser(): AIStreamParser {
function createOllamaChatCompletionStreamParser(): AIStreamParser {
let hasBegun = false;
return (data: string) => {
let wireGeneration: any;
// parse the JSON chunk
let wireJsonChunk: any;
try {
wireGeneration = JSON.parse(data);
wireJsonChunk = JSON.parse(data);
} catch (error: any) {
// log the malformed data to the console, and rethrow to transmit as 'error'
console.log(`/api/llms/stream: Ollama parsing issue: ${error?.message || error}`, data);
throw error;
}
const generation = wireOllamaGenerationSchema.parse(wireGeneration);
let text = generation.response;
// validate chunk
const chunk = wireOllamaChunkedOutputSchema.parse(wireJsonChunk);
// process output
let text = chunk.message?.content || /*chunk.response ||*/ '';
// hack: prepend the model name to the first packet
if (!hasBegun) {
if (!hasBegun && chunk.model) {
hasBegun = true;
const firstPacket: ChatStreamFirstPacketSchema = { model: generation.model };
const firstPacket: ChatStreamFirstPacketSchema = { model: chunk.model };
text = JSON.stringify(firstPacket) + text;
}
return { text, close: generation.done };
return { text, close: chunk.done };
};
}
@@ -248,7 +253,8 @@ function createEventStreamTransformer(vendorTextParser: AIStreamParser, inputFor
if (close)
controller.terminate();
} catch (error: any) {
// console.log(`/api/llms/stream: parse issue: ${error?.message || error}`);
if (SERVER_DEBUG_WIRE)
console.log(' - E: parse issue:', event.data, error?.message || error);
controller.enqueue(textEncoder.encode(`[Stream Issue] ${dialectLabel}: ${safeErrorString(error) || 'Unknown stream parsing error'}`));
controller.terminate();
}
@@ -1,24 +1,29 @@
import * as React from 'react';
import { Box, Button, Chip, FormControl, Input, Option, Select, Stack, Typography } from '@mui/joy';
import { Box, Button, Chip, FormControl, IconButton, Input, Option, Select, Stack, Typography } from '@mui/joy';
import LaunchIcon from '@mui/icons-material/Launch';
import FormatListNumberedRtlIcon from '@mui/icons-material/FormatListNumberedRtl';
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
import { GoodModal } from '~/common/components/GoodModal';
import { GoodTooltip } from '~/common/components/GoodTooltip';
import { InlineError } from '~/common/components/InlineError';
import { Link } from '~/common/components/Link';
import { apiQuery } from '~/common/util/trpc.client';
import { settingsGap } from '~/common/app.theme';
import type { OllamaAccessSchema } from '../../transports/server/ollama/ollama.router';
import { InlineError } from '~/common/components/InlineError';
export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () => void }) {
export function OllamaAdministration(props: { access: OllamaAccessSchema, onClose: () => void }) {
// state
const [sortByPulls, setSortByPulls] = React.useState<boolean>(false);
const [modelName, setModelName] = React.useState<string | null>('llama2');
const [modelTag, setModelTag] = React.useState<string>('');
// external state
const { data: pullable } = apiQuery.llmOllama.adminListPullable.useQuery({ access: props.access }, {
const { data: pullableData } = apiQuery.llmOllama.adminListPullable.useQuery({ access: props.access }, {
staleTime: 1000 * 60,
refetchOnWindowFocus: false,
});
@@ -26,7 +31,11 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
const { isLoading: isDeleting, status: deleteStatus, error: deleteError, mutate: deleteMutate, reset: deleteReset } = apiQuery.llmOllama.adminDelete.useMutation();
// derived state
const pullModelDescription = pullable?.pullable.find(p => p.id === modelName)?.description ?? null;
let pullable = pullableData?.pullable || [];
if (sortByPulls)
pullable = pullable.toSorted((a, b) => b.pulls - a.pulls);
const pullModelDescription = pullable.find(p => p.id === modelName)?.description ?? null;
const handleModelPull = () => {
deleteReset();
@@ -38,6 +47,7 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
modelName && deleteMutate({ access: props.access, name: modelName + (modelTag ? ':' + modelTag : '') });
};
return (
<GoodModal title='Ollama Administration' dividers open onClose={props.onClose}>
@@ -47,25 +57,48 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
However we provide a way to pull models from the Ollama host, for convenience.
</Typography>
<Box sx={{ display: 'flex', gap: 1 }}>
<FormControl sx={{ flexGrow: 1 }}>
<Box sx={{ display: 'flex', flexFlow: 'row wrap', gap: 1 }}>
<FormControl sx={{ flexGrow: 1, flexBasis: 0.55 }}>
<FormLabelStart title='Name' />
<Select value={modelName || ''} onChange={(_event: any, value: string | null) => setModelName(value)}>
{pullable?.pullable.map(p =>
<Option key={p.id} value={p.id}>
{p.isNew === true && <Chip size='sm' variant='outlined'>New</Chip>} {p.label}
</Option>,
)}
</Select>
<Box sx={{ display: 'flex', gap: 1 }}>
<Select
value={modelName || ''}
onChange={(_event: any, value: string | null) => setModelName(value)}
sx={{ flexGrow: 1 }}
>
{pullable.map(p =>
<Option key={p.id} value={p.id}>
{p.isNew === true && <Chip size='sm' variant='outlined'>NEW</Chip>} {p.label}{sortByPulls && ` (${p.pulls.toLocaleString()})`}
</Option>,
)}
</Select>
<GoodTooltip title='Sort by Downloads'>
<IconButton
variant={sortByPulls ? 'solid' : 'outlined'}
onClick={() => setSortByPulls(!sortByPulls)}
>
<FormatListNumberedRtlIcon />
</IconButton>
</GoodTooltip>
</Box>
</FormControl>
<FormControl sx={{ flexGrow: 1 }}>
<FormControl sx={{ flexGrow: 1, flexBasis: 0.45 }}>
<FormLabelStart title='Tag' />
<Input
variant='outlined' placeholder='latest'
value={modelTag || ''} onChange={event => setModelTag(event.target.value)}
sx={{ minWidth: 100 }}
slotProps={{ input: { size: 10 } }} // halve the min width
/>
<Box sx={{ display: 'flex', gap: 1 }}>
<Input
variant='outlined' placeholder='latest'
value={modelTag || ''} onChange={event => setModelTag(event.target.value)}
sx={{ minWidth: 80, flexGrow: 1 }}
slotProps={{ input: { size: 10 } }} // halve the min width
/>
{!!modelName && (
<IconButton
component={Link} href={`https://ollama.ai/library/${modelName}`} target='_blank'
>
<LaunchIcon />
</IconButton>
)}
</Box>
</FormControl>
</Box>
@@ -85,7 +118,7 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
{pullModelDescription}
</Typography>
<Box sx={{ display: 'flex', gap: 1 }}>
<Box sx={{ display: 'flex', flexWrap: 1, gap: 1 }}>
<Button
variant='outlined'
color={deleteStatus === 'error' ? 'danger' : deleteStatus === 'success' ? 'success' : 'primary'}
+2 -2
View File
@@ -11,7 +11,7 @@ import { asValidURL } from '~/common/util/urlUtils';
import { DModelSourceId, useModelsStore, useSourceSetup } from '../../store-llms';
import { ModelVendorOllama } from './ollama.vendor';
import { OllamaAdmin } from './OllamaAdmin';
import { OllamaAdministration } from './OllamaAdministration';
import { modelDescriptionToDLLM } from '../openai/OpenAISourceSetup';
@@ -63,7 +63,7 @@ export function OllamaSourceSetup(props: { sourceId: DModelSourceId }) {
{isError && <InlineError error={error} />}
{adminOpen && <OllamaAdmin access={access} onClose={() => setAdminOpen(false)} />}
{adminOpen && <OllamaAdministration access={access} onClose={() => setAdminOpen(false)} />}
</>;
}