mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 13f502bd76 | |||
| 11055b12ca | |||
| d0ea96eec0 | |||
| 02eafc03f1 | |||
| 33d07a0313 | |||
| 763b852148 | |||
| d5b0617fd7 | |||
| e3ce83674c |
@@ -21,7 +21,7 @@ shows the current developments and future ideas.
|
||||
- Got a suggestion? [_Add your roadmap ideas_](https://github.com/enricoros/big-agi/issues/new?&template=roadmap-request.md)
|
||||
- Want to contribute? [_Pick up a task!_](https://github.com/users/enricoros/projects/4/views/4) - _easy_ to _pro_
|
||||
|
||||
### What's New in 1.7.0 · Dec 10, 2023 · Attachment Theory 🌟
|
||||
### What's New in 1.7.1 · Dec 11, 2023 · Attachment Theory 🌟
|
||||
|
||||
- **Attachments System Overhaul**: Drag, paste, link, snap, text, images, PDFs and more. [#251](https://github.com/enricoros/big-agi/issues/251)
|
||||
- **Desktop Webcam Capture**: Image capture now available as Labs feature. [#253](https://github.com/enricoros/big-agi/issues/253)
|
||||
@@ -31,6 +31,7 @@ shows the current developments and future ideas.
|
||||
- Optimized Voice Input and Performance
|
||||
- Latest Ollama and Oobabooga models
|
||||
- For developers: **Password Protection**: HTTP Basic Auth. [Learn How](https://github.com/enricoros/big-agi/blob/main/docs/deploy-authentication.md)
|
||||
- [1.7.1]: Improved Ollama chats. [#270](https://github.com/enricoros/big-agi/issues/270)
|
||||
|
||||
### What's New in 1.6.0 - Nov 28, 2023
|
||||
|
||||
|
||||
+2
-1
@@ -10,7 +10,7 @@ by release.
|
||||
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
|
||||
- milestone: [1.8.0](https://github.com/enricoros/big-agi/milestone/8)
|
||||
|
||||
### What's New in 1.7.0 · Dec 10, 2023 · Attachment Theory 🌟
|
||||
### What's New in 1.7.1 · Dec 11, 2023 · Attachment Theory 🌟
|
||||
|
||||
- **Attachments System Overhaul**: Drag, paste, link, snap, text, images, PDFs and more. [#251](https://github.com/enricoros/big-agi/issues/251)
|
||||
- **Desktop Webcam Capture**: Image capture now available as Labs feature. [#253](https://github.com/enricoros/big-agi/issues/253)
|
||||
@@ -20,6 +20,7 @@ by release.
|
||||
- Optimized Voice Input and Performance
|
||||
- Latest Ollama and Oobabooga models
|
||||
- For developers: **Password Protection**: HTTP Basic Auth. [Learn How](https://github.com/enricoros/big-agi/blob/main/docs/deploy-authentication.md)
|
||||
- [1.7.1]: Improved Ollama chats. [#270](https://github.com/enricoros/big-agi/issues/270)
|
||||
|
||||
### What's New in 1.6.0 - Nov 28, 2023 · Surf's Up
|
||||
|
||||
|
||||
+10
-5
@@ -5,15 +5,20 @@ This guide helps you connect [Ollama](https://ollama.ai) [models](https://ollama
|
||||
experience. The integration brings the popular big-AGI features to Ollama, including: voice chats,
|
||||
editing tools, models switching, personas, and more.
|
||||
|
||||
_Last updated Dec 11, 2023_
|
||||
|
||||

|
||||
|
||||
## Quick Integration Guide
|
||||
|
||||
1. **Ensure Ollama API Server is Running**: Before starting, make sure your Ollama API server is up and running.
|
||||
2. **Add Ollama as a Model Source**: In `big-AGI`, navigate to the **Models** section, select **Add a model source**, and choose **Ollama**.
|
||||
3. **Enter Ollama Host URL**: Provide the Ollama Host URL where the API server is accessible (e.g., `http://localhost:11434`).
|
||||
4. **Refresh Model List**: Once connected, refresh the list of available models to include the Ollama models.
|
||||
5. **Start Using AI Personas**: Select an Ollama model and begin interacting with AI personas tailored to your needs.
|
||||
1. **Ensure Ollama API Server is Running**: Follow the official instructions to get Ollama up and running on your machine
|
||||
2. **Add Ollama as a Model Source**: In `big-AGI`, navigate to the **Models** section, select **Add a model source**, and choose **Ollama**
|
||||
3. **Enter Ollama Host URL**: Provide the Ollama Host URL where the API server is accessible (e.g., `http://localhost:11434`)
|
||||
4. **Refresh Model List**: Once connected, refresh the list of available models to include the Ollama models
|
||||
> Optional: use the Ollama Admin interface to see which models are available and 'Pull' them in your local machine. Note
|
||||
that this operation will likely timeout due to Edge Functions timeout on the big-AGI server while pulling, and
|
||||
you'll have to press the 'Pull' button again, until a green message appears.
|
||||
5. **Chat with Ollama models**: select an Ollama model and begin chatting with AI personas
|
||||
|
||||
### Ollama: installation and Setup
|
||||
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "big-agi",
|
||||
"version": "1.7.0",
|
||||
"version": "1.7.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "big-agi",
|
||||
"version": "1.7.0",
|
||||
"version": "1.7.1",
|
||||
"hasInstallScript": true,
|
||||
"dependencies": {
|
||||
"@dqbd/tiktoken": "^1.0.7",
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "big-agi",
|
||||
"version": "1.7.0",
|
||||
"version": "1.7.1",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
||||
@@ -67,9 +67,10 @@ export const NewsItems: NewsItem[] = [
|
||||
],
|
||||
},*/
|
||||
{
|
||||
versionCode: '1.7.0',
|
||||
versionCode: '1.7.1',
|
||||
versionName: 'Attachment Theory',
|
||||
versionDate: new Date('2023-12-10T12:00:00Z'), // new Date().toISOString()
|
||||
versionDate: new Date('2023-12-11T06:00:00Z'), // new Date().toISOString()
|
||||
// versionDate: new Date('2023-12-10T12:00:00Z'), // 1.7.0
|
||||
items: [
|
||||
{ text: <>Redesigned <B href={RIssues + '/251'}>attachments system</B>: drag, paste, link, snap, images, text, pdfs</> },
|
||||
{ text: <>Desktop <B href={RIssues + '/253'}>webcam access</B> for direct image capture (Labs option)</> },
|
||||
@@ -79,6 +80,7 @@ export const NewsItems: NewsItem[] = [
|
||||
{ text: <>{platformAwareKeystrokes('Ctrl+Shift+O')}: quick access to model options</> },
|
||||
{ text: <>Optimized voice input and performance</> },
|
||||
{ text: <>Latest Ollama and Oobabooga models</> },
|
||||
{ text: <>1.7.1: Improved <B href={RIssues + '/270'}>Ollama chats</B></> },
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -46,6 +46,7 @@ export const appTheme = extendTheme({
|
||||
text: {
|
||||
icon: 'var(--joy-palette-neutral-700)', // <IconButton color='neutral' /> icon color
|
||||
secondary: 'var(--joy-palette-neutral-800)', // increase contrast a bit
|
||||
// tertiary: 'var(--joy-palette-neutral-700)', // increase contrast a bit
|
||||
},
|
||||
// popup [white] > surface [50] > level1 [100] > level2 [200] > level3 [300] > body [white -> 400]
|
||||
background: {
|
||||
|
||||
@@ -3,54 +3,57 @@
|
||||
* descriptions for the models.
|
||||
* (nor does it reliably provide context window sizes) - TODO: open a bug upstream
|
||||
*
|
||||
* from: https://ollama.ai/library?sort=popular
|
||||
* from: https://ollama.ai/library?sort=featured
|
||||
*/
|
||||
export const OLLAMA_BASE_MODELS: { [key: string]: { description: string, pulls: number, added?: string } } = {
|
||||
'mistral': { description: 'The Mistral 7B model released by Mistral AI', pulls: 56100 },
|
||||
'llama2': { description: 'The most popular model for general use.', pulls: 117400 },
|
||||
'codellama': { description: 'A large language model that can use text prompts to generate and discuss code.', pulls: 61500 },
|
||||
'llama2-uncensored': { description: 'Uncensored Llama 2 model by George Sung and Jarrad Hope.', pulls: 26800 },
|
||||
'orca-mini': { description: 'A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.', pulls: 23000 },
|
||||
'vicuna': { description: 'General use chat model based on Llama and Llama 2 with 2K to 16K context sizes.', pulls: 20600 },
|
||||
'wizard-vicuna-uncensored': { description: 'Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford.', pulls: 12100 },
|
||||
'phind-codellama': { description: 'Code generation model based on CodeLlama.', pulls: 9760 },
|
||||
'wizardcoder': { description: 'Llama based code generation model focused on Python.', pulls: 9002 },
|
||||
'mistral-openorca': { description: 'Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset.', pulls: 8671 },
|
||||
'nous-hermes': { description: 'General use models based on Llama and Llama 2 from Nous Research.', pulls: 8478 },
|
||||
'zephyr': { description: 'Zephyr beta is a fine-tuned 7B version of mistral that was trained on on a mix of publicly available, synthetic datasets.', pulls: 8142 },
|
||||
'wizard-math': { description: 'Model focused on math and logic problems', pulls: 7426 },
|
||||
'llama2-chinese': { description: 'Llama 2 based model fine tuned to improve Chinese dialogue ability.', pulls: 7035 },
|
||||
'stable-beluga': { description: 'Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy.', pulls: 6140 },
|
||||
'falcon': { description: 'A large language model built by the Technology Innovation Institute (TII) for use in summarization, text generation, and chat bots.', pulls: 5865 },
|
||||
'codeup': { description: 'Great code generation model based on Llama2.', pulls: 5534 },
|
||||
'everythinglm': { description: 'Uncensored Llama2 based model with 16k context size.', pulls: 4696 },
|
||||
'medllama2': { description: 'Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset.', pulls: 4275 },
|
||||
'wizardlm-uncensored': { description: 'Uncensored version of Wizard LM model.', pulls: 4227 },
|
||||
'deepseek-coder': { description: 'DeepSeek Coder is trained from scratch on both 87% code and 13% natural language in English and Chinese. Each of the models are pre-trained on 2 trillion tokens.', pulls: 3663, added: '20231129' },
|
||||
'wizard-vicuna': { description: 'Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj.', pulls: 3343 },
|
||||
'orca2': { description: 'Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta\'s Llama 2 models. The model is designed to excel particularly in reasoning.', pulls: 3134, added: '20231129' },
|
||||
'open-orca-platypus2': { description: 'Merge of the Open Orca OpenChat model and the Garage-bAInd Platypus 2 model. Designed for chat and code generation.', pulls: 3050 },
|
||||
'starcoder': { description: 'StarCoder is a code generation model trained on 80+ programming languages.', pulls: 2981 },
|
||||
'dolphin2.2-mistral': { description: 'An instruct-tuned model based on Mistral. Version 2.2 is fine-tuned for improved conversation and empathy.', pulls: 2636 },
|
||||
'yarn-mistral': { description: 'An extension of Mistral to support a context of up to 128k tokens.', pulls: 2328 },
|
||||
'openchat': { description: 'A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks.', pulls: 2281, added: '20231129' },
|
||||
'openhermes2.5-mistral': { description: 'OpenHermes 2.5 Mistral 7B is a Mistral 7B fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.', pulls: 2101 },
|
||||
'yi': { description: 'A high-performing, bilingual base model.', pulls: 1806 },
|
||||
'samantha-mistral': { description: 'A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral.', pulls: 1803 },
|
||||
'yarn-llama2': { description: 'An extension of Llama 2 that supports a context of up to 128k tokens.', pulls: 1605 },
|
||||
'sqlcoder': { description: 'SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks.', pulls: 1584 },
|
||||
'openhermes2-mistral': { description: 'OpenHermes 2 Mistral is a 7B model fine-tuned on Mistral with 900,000 entries of primarily GPT-4 generated data from open datasets.', pulls: 1560 },
|
||||
'neural-chat': { description: 'A fine-tuned model based on Mistral with good coverage of domain and language.', pulls: 1338, added: '20231129' },
|
||||
'wizardlm': { description: 'General use 70 billion parameter model based on Llama 2.', pulls: 1253 },
|
||||
'dolphin2.1-mistral': { description: 'An instruct-tuned model based on Mistral and trained on a dataset filtered to remove alignment and bias.', pulls: 1163 },
|
||||
'mistrallite': { description: 'MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts.', pulls: 1099 },
|
||||
'codebooga': { description: 'A high-performing code instruct model created by merging two existing code models.', pulls: 1042 },
|
||||
'goliath': { description: 'A language model created by combining two fine-tuned Llama 2 70B models into one.', pulls: 728, added: '20231129' },
|
||||
'xwinlm': { description: 'Conversational model based on Llama 2 that performs competitively on various benchmarks.', pulls: 593 },
|
||||
'nexusraven': { description: 'Nexus Raven is a 13B instruction tuned model for function calling tasks.', pulls: 585 },
|
||||
'alfred': { description: 'A robust conversational model designed to be used for both chat and instruct use cases.', pulls: 573, added: '20231129' },
|
||||
'starling-lm': { description: 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.', pulls: 446, added: '20231129' },
|
||||
'meditron': { description: 'Open-source medical large language model adapted from Llama 2 to the medical domain.', pulls: 100, added: '20231129' },
|
||||
'deepseek-llm': { description: 'An advanced language model crafted with 2 trillion bilingual tokens.', pulls: 11, added: '20231129' },
|
||||
'starling-lm': { description: 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.', pulls: 2353, added: '20231129' },
|
||||
'neural-chat': { description: 'A fine-tuned model based on Mistral with good coverage of domain and language.', pulls: 3089, added: '20231129' },
|
||||
'mistral': { description: 'The Mistral 7B model released by Mistral AI', pulls: 70300 },
|
||||
'yi': { description: 'A high-performing, bilingual base model.', pulls: 2673 },
|
||||
'llama2': { description: 'The most popular model for general use.', pulls: 141000 },
|
||||
'codellama': { description: 'A large language model that can use text prompts to generate and discuss code.', pulls: 71400 },
|
||||
'llama2-uncensored': { description: 'Uncensored Llama 2 model by George Sung and Jarrad Hope.', pulls: 30900 },
|
||||
'orca-mini': { description: 'A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.', pulls: 26000 },
|
||||
'vicuna': { description: 'General use chat model based on Llama and Llama 2 with 2K to 16K context sizes.', pulls: 21800 },
|
||||
'wizard-vicuna-uncensored': { description: 'Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford.', pulls: 13700 },
|
||||
'phind-codellama': { description: 'Code generation model based on CodeLlama.', pulls: 10600 },
|
||||
'zephyr': { description: 'Zephyr beta is a fine-tuned 7B version of mistral that was trained on on a mix of publicly available, synthetic datasets.', pulls: 10200 },
|
||||
'wizardcoder': { description: 'Llama based code generation model focused on Python.', pulls: 9895 },
|
||||
'mistral-openorca': { description: 'Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset.', pulls: 9256 },
|
||||
'nous-hermes': { description: 'General use models based on Llama and Llama 2 from Nous Research.', pulls: 8827 },
|
||||
'wizard-math': { description: 'Model focused on math and logic problems', pulls: 7849 },
|
||||
'llama2-chinese': { description: 'Llama 2 based model fine tuned to improve Chinese dialogue ability.', pulls: 7375 },
|
||||
'deepseek-coder': { description: 'DeepSeek Coder is trained from scratch on both 87% code and 13% natural language in English and Chinese. Each of the models are pre-trained on 2 trillion tokens.', pulls: 7335, added: '20231129' },
|
||||
'falcon': { description: 'A large language model built by the Technology Innovation Institute (TII) for use in summarization, text generation, and chat bots.', pulls: 6726 },
|
||||
'stable-beluga': { description: 'Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy.', pulls: 6272 },
|
||||
'codeup': { description: 'Great code generation model based on Llama2.', pulls: 5978 },
|
||||
'orca2': { description: 'Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta\'s Llama 2 models. The model is designed to excel particularly in reasoning.', pulls: 5854, added: '20231129' },
|
||||
'everythinglm': { description: 'Uncensored Llama2 based model with 16k context size.', pulls: 5040 },
|
||||
'medllama2': { description: 'Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset.', pulls: 4648 },
|
||||
'wizardlm-uncensored': { description: 'Uncensored version of Wizard LM model.', pulls: 4536 },
|
||||
'dolphin2.2-mistral': { description: 'An instruct-tuned model based on Mistral. Version 2.2 is fine-tuned for improved conversation and empathy.', pulls: 3638 },
|
||||
'starcoder': { description: 'StarCoder is a code generation model trained on 80+ programming languages.', pulls: 3638 },
|
||||
'wizard-vicuna': { description: 'Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj.', pulls: 3485 },
|
||||
'openchat': { description: 'A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks.', pulls: 3438, added: '20231129' },
|
||||
'open-orca-platypus2': { description: 'Merge of the Open Orca OpenChat model and the Garage-bAInd Platypus 2 model. Designed for chat and code generation.', pulls: 3145 },
|
||||
'openhermes2.5-mistral': { description: 'OpenHermes 2.5 Mistral 7B is a Mistral 7B fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.', pulls: 3023 },
|
||||
'yarn-mistral': { description: 'An extension of Mistral to support a context of up to 128k tokens.', pulls: 2775 },
|
||||
'samantha-mistral': { description: 'A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral.', pulls: 2192 },
|
||||
'sqlcoder': { description: 'SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks', pulls: 1973 },
|
||||
'yarn-llama2': { description: 'An extension of Llama 2 that supports a context of up to 128k tokens.', pulls: 1915 },
|
||||
'openhermes2-mistral': { description: 'OpenHermes 2 Mistral is a 7B model fine-tuned on Mistral with 900,000 entries of primarily GPT-4 generated data from open datasets.', pulls: 1690 },
|
||||
'meditron': { description: 'Open-source medical large language model adapted from Llama 2 to the medical domain.', pulls: 1667, added: '20231129' },
|
||||
'wizardlm': { description: 'General use 70 billion parameter model based on Llama 2.', pulls: 1379 },
|
||||
'mistrallite': { description: 'MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts.', pulls: 1345 },
|
||||
'deepseek-llm': { description: 'An advanced language model crafted with 2 trillion bilingual tokens.', pulls: 1318, added: '20231129' },
|
||||
'dolphin2.1-mistral': { description: 'An instruct-tuned model based on Mistral and trained on a dataset filtered to remove alignment and bias.', pulls: 1302 },
|
||||
'codebooga': { description: 'A high-performing code instruct model created by merging two existing code models.', pulls: 1254 },
|
||||
'goliath': { description: 'A language model created by combining two fine-tuned Llama 2 70B models into one.', pulls: 946, added: '20231129' },
|
||||
'stablelm-zephyr': { description: 'A lightweight chat model allowing accurate, and responsive output without requiring high-end hardware.', pulls: 945, added: '20231210' },
|
||||
'nexusraven': { description: 'Nexus Raven is a 13B instruction tuned model for function calling tasks.', pulls: 860 },
|
||||
'magicoder': { description: '🎩 Magicoder is a family of 7B parameter models trained on 75K synthetic instruction data using OSS-Instruct, a novel approach to enlightening LLMs with open-source code snippets.', pulls: 816, added: '20231210' },
|
||||
'alfred': { description: 'A robust conversational model designed to be used for both chat and instruct use cases.', pulls: 804, added: '20231129' },
|
||||
'xwinlm': { description: 'Conversational model based on Llama 2 that performs competitively on various benchmarks.', pulls: 706 },
|
||||
};
|
||||
export const OLLAMA_LAST_UPDATE: string = '20231129';
|
||||
// export const OLLAMA_LAST_UPDATE: string = '20231210';
|
||||
export const OLLAMA_PREV_UPDATE: string = '20231129';
|
||||
@@ -11,12 +11,15 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
|
||||
import { fixupHost, openAIChatGenerateOutputSchema, OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
|
||||
import { listModelsOutputSchema, ModelDescriptionSchema } from '../server.schemas';
|
||||
|
||||
import { OLLAMA_BASE_MODELS, OLLAMA_LAST_UPDATE } from './ollama.models';
|
||||
import { wireOllamaGenerationSchema } from './ollama.wiretypes';
|
||||
import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
|
||||
import { WireOllamaChatCompletionInput, wireOllamaChunkedOutputSchema } from './ollama.wiretypes';
|
||||
|
||||
|
||||
// Default hosts
|
||||
const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434';
|
||||
export const OLLAMA_PATH_CHAT = '/api/chat';
|
||||
const OLLAMA_PATH_TAGS = '/api/tags';
|
||||
const OLLAMA_PATH_SHOW = '/api/show';
|
||||
|
||||
|
||||
// Mappers
|
||||
@@ -34,7 +37,23 @@ export function ollamaAccess(access: OllamaAccessSchema, apiPath: string): { hea
|
||||
|
||||
}
|
||||
|
||||
export function ollamaChatCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean) {
|
||||
|
||||
export const ollamaChatCompletionPayload = (model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): WireOllamaChatCompletionInput => ({
|
||||
model: model.id,
|
||||
messages: history,
|
||||
options: {
|
||||
...(model.temperature && { temperature: model.temperature }),
|
||||
},
|
||||
// n: ...
|
||||
// functions: ...
|
||||
// function_call: ...
|
||||
stream,
|
||||
});
|
||||
|
||||
|
||||
/* Unused: switched to the Chat endpoint (above). The implementation is left here for reference.
|
||||
https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
|
||||
export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean) {
|
||||
|
||||
// if the first message is the system prompt, extract it
|
||||
let systemPrompt: string | undefined = undefined;
|
||||
@@ -62,7 +81,7 @@ export function ollamaChatCompletionPayload(model: OpenAIModelSchema, history: O
|
||||
...(systemPrompt && { system: systemPrompt }),
|
||||
stream,
|
||||
};
|
||||
}
|
||||
}*/
|
||||
|
||||
async function ollamaGET<TOut extends object>(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
|
||||
const { headers, url } = ollamaAccess(access, apiPath);
|
||||
@@ -104,6 +123,7 @@ const listPullableOutputSchema = z.object({
|
||||
label: z.string(),
|
||||
tag: z.string(),
|
||||
description: z.string(),
|
||||
pulls: z.number(),
|
||||
isNew: z.boolean(),
|
||||
})),
|
||||
});
|
||||
@@ -122,7 +142,8 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
label: capitalizeFirstLetter(model_id),
|
||||
tag: 'latest',
|
||||
description: model.description,
|
||||
isNew: !!model.added && model.added >= OLLAMA_LAST_UPDATE,
|
||||
pulls: model.pulls,
|
||||
isNew: !!model.added && model.added >= OLLAMA_PREV_UPDATE,
|
||||
})),
|
||||
};
|
||||
}),
|
||||
@@ -160,6 +181,7 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
throw new Error('Ollama delete issue: ' + deleteOutput);
|
||||
}),
|
||||
|
||||
|
||||
/* Ollama: List the Models available */
|
||||
listModels: publicProcedure
|
||||
.input(accessOnlySchema)
|
||||
@@ -167,7 +189,7 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
.query(async ({ input }) => {
|
||||
|
||||
// get the models
|
||||
const wireModels = await ollamaGET(input.access, '/api/tags');
|
||||
const wireModels = await ollamaGET(input.access, OLLAMA_PATH_TAGS);
|
||||
const wireOllamaListModelsSchema = z.object({
|
||||
models: z.array(z.object({
|
||||
name: z.string(),
|
||||
@@ -180,7 +202,7 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
|
||||
// retrieve info for each of the models (/api/show, post call, in parallel)
|
||||
const detailedModels = await Promise.all(models.map(async model => {
|
||||
const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, '/api/show');
|
||||
const wireModelInfo = await ollamaPOST(input.access, { 'name': model.name }, OLLAMA_PATH_SHOW);
|
||||
const wireOllamaModelInfoSchema = z.object({
|
||||
license: z.string().optional(),
|
||||
modelfile: z.string(),
|
||||
@@ -221,12 +243,15 @@ export const llmOllamaRouter = createTRPCRouter({
|
||||
.output(openAIChatGenerateOutputSchema)
|
||||
.mutation(async ({ input: { access, history, model } }) => {
|
||||
|
||||
const wireGeneration = await ollamaPOST(access, ollamaChatCompletionPayload(model, history, false), '/api/generate');
|
||||
const generation = wireOllamaGenerationSchema.parse(wireGeneration);
|
||||
const wireGeneration = await ollamaPOST(access, ollamaChatCompletionPayload(model, history, false), OLLAMA_PATH_CHAT);
|
||||
const generation = wireOllamaChunkedOutputSchema.parse(wireGeneration);
|
||||
|
||||
if (!generation.message?.content)
|
||||
throw new Error('Ollama chat generation (non-stream) issue: ' + JSON.stringify(wireGeneration));
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
content: generation.response,
|
||||
content: generation.message.content,
|
||||
finish_reason: generation.done ? 'stop' : null,
|
||||
};
|
||||
}),
|
||||
|
||||
@@ -1,16 +1,69 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
export const wireOllamaGenerationSchema = z.object({
|
||||
|
||||
/**
|
||||
* Chat Completion API - Request
|
||||
* https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-chat-completion
|
||||
*/
|
||||
const wireOllamaChatCompletionInputSchema = z.object({
|
||||
|
||||
// required
|
||||
model: z.string(),
|
||||
messages: z.array(z.object({
|
||||
role: z.enum(['assistant', 'system', 'user']),
|
||||
content: z.string(),
|
||||
})),
|
||||
|
||||
// optional
|
||||
format: z.enum(['json']).optional(),
|
||||
options: z.object({
|
||||
// https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md
|
||||
// Maximum number of tokens to predict when generating text.
|
||||
num_predict: z.number().int().optional(),
|
||||
// Sets the random number seed to use for generation
|
||||
seed: z.number().int().optional(),
|
||||
// The temperature of the model
|
||||
temperature: z.number().positive().optional(),
|
||||
// Reduces the probability of generating nonsense (Default: 40)
|
||||
top_k: z.number().positive().optional(),
|
||||
// Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text. (Default 0.9)
|
||||
top_p: z.number().positive().optional(),
|
||||
}).optional(),
|
||||
template: z.string().optional(), // overrides what is defined in the Modelfile
|
||||
stream: z.boolean().optional(), // default: true
|
||||
|
||||
// Future Improvements?
|
||||
// n: z.number().int().optional(), // number of completions to generate
|
||||
// functions: ...
|
||||
// function_call: ...
|
||||
});
|
||||
export type WireOllamaChatCompletionInput = z.infer<typeof wireOllamaChatCompletionInputSchema>;
|
||||
|
||||
|
||||
/**
|
||||
* Chat Completion or Generation APIs - Streaming Response
|
||||
*/
|
||||
export const wireOllamaChunkedOutputSchema = z.object({
|
||||
model: z.string(),
|
||||
// created_at: z.string(), // commented because unused
|
||||
response: z.string(),
|
||||
|
||||
// [Chat Completion] (exclusive with 'response')
|
||||
message: z.object({
|
||||
role: z.enum(['assistant' /*, 'system', 'user' Disabled on purpose, to validate the response */]),
|
||||
content: z.string(),
|
||||
}).optional(), // optional on the last message
|
||||
|
||||
// [Generation] (non-chat, exclusive with 'message')
|
||||
//response: z.string().optional(),
|
||||
|
||||
done: z.boolean(),
|
||||
|
||||
// only on the last message
|
||||
// context: z.array(z.number()),
|
||||
// context: z.array(z.number()), // non-chat endpoint
|
||||
// total_duration: z.number(),
|
||||
// load_duration: z.number(),
|
||||
// eval_duration: z.number(),
|
||||
// prompt_eval_count: z.number(),
|
||||
// prompt_eval_duration: z.number(),
|
||||
// eval_count: z.number(),
|
||||
});
|
||||
// eval_duration: z.number(),
|
||||
|
||||
});
|
||||
@@ -6,10 +6,10 @@ import { createEmptyReadableStream, debugGenerateCurlCommand, safeErrorString, S
|
||||
|
||||
import type { AnthropicWire } from '../anthropic/anthropic.wiretypes';
|
||||
import type { OpenAIWire } from './openai.wiretypes';
|
||||
import { OLLAMA_PATH_CHAT, ollamaAccess, ollamaAccessSchema, ollamaChatCompletionPayload } from '../ollama/ollama.router';
|
||||
import { anthropicAccess, anthropicAccessSchema, anthropicChatCompletionPayload } from '../anthropic/anthropic.router';
|
||||
import { ollamaAccess, ollamaAccessSchema, ollamaChatCompletionPayload } from '../ollama/ollama.router';
|
||||
import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai.router';
|
||||
import { wireOllamaGenerationSchema } from '../ollama/ollama.wiretypes';
|
||||
import { wireOllamaChunkedOutputSchema } from '../ollama/ollama.wiretypes';
|
||||
|
||||
|
||||
/**
|
||||
@@ -59,10 +59,10 @@ export async function openaiStreamingRelayHandler(req: NextRequest): Promise<Res
|
||||
break;
|
||||
|
||||
case 'ollama':
|
||||
headersUrl = ollamaAccess(access, '/api/generate');
|
||||
headersUrl = ollamaAccess(access, OLLAMA_PATH_CHAT);
|
||||
body = ollamaChatCompletionPayload(model, history, true);
|
||||
eventStreamFormat = 'json-nl';
|
||||
vendorStreamParser = createOllamaStreamParser();
|
||||
vendorStreamParser = createOllamaChatCompletionStreamParser();
|
||||
break;
|
||||
|
||||
case 'azure':
|
||||
@@ -135,30 +135,35 @@ function createAnthropicStreamParser(): AIStreamParser {
|
||||
};
|
||||
}
|
||||
|
||||
function createOllamaStreamParser(): AIStreamParser {
|
||||
function createOllamaChatCompletionStreamParser(): AIStreamParser {
|
||||
let hasBegun = false;
|
||||
|
||||
return (data: string) => {
|
||||
|
||||
let wireGeneration: any;
|
||||
// parse the JSON chunk
|
||||
let wireJsonChunk: any;
|
||||
try {
|
||||
wireGeneration = JSON.parse(data);
|
||||
wireJsonChunk = JSON.parse(data);
|
||||
} catch (error: any) {
|
||||
// log the malformed data to the console, and rethrow to transmit as 'error'
|
||||
console.log(`/api/llms/stream: Ollama parsing issue: ${error?.message || error}`, data);
|
||||
throw error;
|
||||
}
|
||||
const generation = wireOllamaGenerationSchema.parse(wireGeneration);
|
||||
let text = generation.response;
|
||||
|
||||
// validate chunk
|
||||
const chunk = wireOllamaChunkedOutputSchema.parse(wireJsonChunk);
|
||||
|
||||
// process output
|
||||
let text = chunk.message?.content || /*chunk.response ||*/ '';
|
||||
|
||||
// hack: prepend the model name to the first packet
|
||||
if (!hasBegun) {
|
||||
if (!hasBegun && chunk.model) {
|
||||
hasBegun = true;
|
||||
const firstPacket: ChatStreamFirstPacketSchema = { model: generation.model };
|
||||
const firstPacket: ChatStreamFirstPacketSchema = { model: chunk.model };
|
||||
text = JSON.stringify(firstPacket) + text;
|
||||
}
|
||||
|
||||
return { text, close: generation.done };
|
||||
return { text, close: chunk.done };
|
||||
};
|
||||
}
|
||||
|
||||
@@ -248,7 +253,8 @@ function createEventStreamTransformer(vendorTextParser: AIStreamParser, inputFor
|
||||
if (close)
|
||||
controller.terminate();
|
||||
} catch (error: any) {
|
||||
// console.log(`/api/llms/stream: parse issue: ${error?.message || error}`);
|
||||
if (SERVER_DEBUG_WIRE)
|
||||
console.log(' - E: parse issue:', event.data, error?.message || error);
|
||||
controller.enqueue(textEncoder.encode(`[Stream Issue] ${dialectLabel}: ${safeErrorString(error) || 'Unknown stream parsing error'}`));
|
||||
controller.terminate();
|
||||
}
|
||||
|
||||
+55
-22
@@ -1,24 +1,29 @@
|
||||
import * as React from 'react';
|
||||
|
||||
import { Box, Button, Chip, FormControl, Input, Option, Select, Stack, Typography } from '@mui/joy';
|
||||
import { Box, Button, Chip, FormControl, IconButton, Input, Option, Select, Stack, Typography } from '@mui/joy';
|
||||
import LaunchIcon from '@mui/icons-material/Launch';
|
||||
import FormatListNumberedRtlIcon from '@mui/icons-material/FormatListNumberedRtl';
|
||||
|
||||
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
|
||||
import { GoodModal } from '~/common/components/GoodModal';
|
||||
import { GoodTooltip } from '~/common/components/GoodTooltip';
|
||||
import { InlineError } from '~/common/components/InlineError';
|
||||
import { Link } from '~/common/components/Link';
|
||||
import { apiQuery } from '~/common/util/trpc.client';
|
||||
import { settingsGap } from '~/common/app.theme';
|
||||
|
||||
import type { OllamaAccessSchema } from '../../transports/server/ollama/ollama.router';
|
||||
import { InlineError } from '~/common/components/InlineError';
|
||||
|
||||
|
||||
export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () => void }) {
|
||||
export function OllamaAdministration(props: { access: OllamaAccessSchema, onClose: () => void }) {
|
||||
|
||||
// state
|
||||
const [sortByPulls, setSortByPulls] = React.useState<boolean>(false);
|
||||
const [modelName, setModelName] = React.useState<string | null>('llama2');
|
||||
const [modelTag, setModelTag] = React.useState<string>('');
|
||||
|
||||
// external state
|
||||
const { data: pullable } = apiQuery.llmOllama.adminListPullable.useQuery({ access: props.access }, {
|
||||
const { data: pullableData } = apiQuery.llmOllama.adminListPullable.useQuery({ access: props.access }, {
|
||||
staleTime: 1000 * 60,
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
@@ -26,7 +31,11 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
|
||||
const { isLoading: isDeleting, status: deleteStatus, error: deleteError, mutate: deleteMutate, reset: deleteReset } = apiQuery.llmOllama.adminDelete.useMutation();
|
||||
|
||||
// derived state
|
||||
const pullModelDescription = pullable?.pullable.find(p => p.id === modelName)?.description ?? null;
|
||||
let pullable = pullableData?.pullable || [];
|
||||
if (sortByPulls)
|
||||
pullable = pullable.toSorted((a, b) => b.pulls - a.pulls);
|
||||
const pullModelDescription = pullable.find(p => p.id === modelName)?.description ?? null;
|
||||
|
||||
|
||||
const handleModelPull = () => {
|
||||
deleteReset();
|
||||
@@ -38,6 +47,7 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
|
||||
modelName && deleteMutate({ access: props.access, name: modelName + (modelTag ? ':' + modelTag : '') });
|
||||
};
|
||||
|
||||
|
||||
return (
|
||||
<GoodModal title='Ollama Administration' dividers open onClose={props.onClose}>
|
||||
|
||||
@@ -47,25 +57,48 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
|
||||
However we provide a way to pull models from the Ollama host, for convenience.
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ display: 'flex', gap: 1 }}>
|
||||
<FormControl sx={{ flexGrow: 1 }}>
|
||||
<Box sx={{ display: 'flex', flexFlow: 'row wrap', gap: 1 }}>
|
||||
<FormControl sx={{ flexGrow: 1, flexBasis: 0.55 }}>
|
||||
<FormLabelStart title='Name' />
|
||||
<Select value={modelName || ''} onChange={(_event: any, value: string | null) => setModelName(value)}>
|
||||
{pullable?.pullable.map(p =>
|
||||
<Option key={p.id} value={p.id}>
|
||||
{p.isNew === true && <Chip size='sm' variant='outlined'>New</Chip>} {p.label}
|
||||
</Option>,
|
||||
)}
|
||||
</Select>
|
||||
<Box sx={{ display: 'flex', gap: 1 }}>
|
||||
<Select
|
||||
value={modelName || ''}
|
||||
onChange={(_event: any, value: string | null) => setModelName(value)}
|
||||
sx={{ flexGrow: 1 }}
|
||||
>
|
||||
{pullable.map(p =>
|
||||
<Option key={p.id} value={p.id}>
|
||||
{p.isNew === true && <Chip size='sm' variant='outlined'>NEW</Chip>} {p.label}{sortByPulls && ` (${p.pulls.toLocaleString()})`}
|
||||
</Option>,
|
||||
)}
|
||||
</Select>
|
||||
<GoodTooltip title='Sort by Downloads'>
|
||||
<IconButton
|
||||
variant={sortByPulls ? 'solid' : 'outlined'}
|
||||
onClick={() => setSortByPulls(!sortByPulls)}
|
||||
>
|
||||
<FormatListNumberedRtlIcon />
|
||||
</IconButton>
|
||||
</GoodTooltip>
|
||||
</Box>
|
||||
</FormControl>
|
||||
<FormControl sx={{ flexGrow: 1 }}>
|
||||
<FormControl sx={{ flexGrow: 1, flexBasis: 0.45 }}>
|
||||
<FormLabelStart title='Tag' />
|
||||
<Input
|
||||
variant='outlined' placeholder='latest'
|
||||
value={modelTag || ''} onChange={event => setModelTag(event.target.value)}
|
||||
sx={{ minWidth: 100 }}
|
||||
slotProps={{ input: { size: 10 } }} // halve the min width
|
||||
/>
|
||||
<Box sx={{ display: 'flex', gap: 1 }}>
|
||||
<Input
|
||||
variant='outlined' placeholder='latest'
|
||||
value={modelTag || ''} onChange={event => setModelTag(event.target.value)}
|
||||
sx={{ minWidth: 80, flexGrow: 1 }}
|
||||
slotProps={{ input: { size: 10 } }} // halve the min width
|
||||
/>
|
||||
{!!modelName && (
|
||||
<IconButton
|
||||
component={Link} href={`https://ollama.ai/library/${modelName}`} target='_blank'
|
||||
>
|
||||
<LaunchIcon />
|
||||
</IconButton>
|
||||
)}
|
||||
</Box>
|
||||
</FormControl>
|
||||
</Box>
|
||||
|
||||
@@ -85,7 +118,7 @@ export function OllamaAdmin(props: { access: OllamaAccessSchema, onClose: () =>
|
||||
{pullModelDescription}
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ display: 'flex', gap: 1 }}>
|
||||
<Box sx={{ display: 'flex', flexWrap: 1, gap: 1 }}>
|
||||
<Button
|
||||
variant='outlined'
|
||||
color={deleteStatus === 'error' ? 'danger' : deleteStatus === 'success' ? 'success' : 'primary'}
|
||||
+2
-2
@@ -11,7 +11,7 @@ import { asValidURL } from '~/common/util/urlUtils';
|
||||
|
||||
import { DModelSourceId, useModelsStore, useSourceSetup } from '../../store-llms';
|
||||
import { ModelVendorOllama } from './ollama.vendor';
|
||||
import { OllamaAdmin } from './OllamaAdmin';
|
||||
import { OllamaAdministration } from './OllamaAdministration';
|
||||
import { modelDescriptionToDLLM } from '../openai/OpenAISourceSetup';
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ export function OllamaSourceSetup(props: { sourceId: DModelSourceId }) {
|
||||
|
||||
{isError && <InlineError error={error} />}
|
||||
|
||||
{adminOpen && <OllamaAdmin access={access} onClose={() => setAdminOpen(false)} />}
|
||||
{adminOpen && <OllamaAdministration access={access} onClose={() => setAdminOpen(false)} />}
|
||||
|
||||
</>;
|
||||
}
|
||||
Reference in New Issue
Block a user