Anthropic: hardcode date

1.16.3: release
Anthropic: support Claude 3.5 Sonnet
2026-05-10 21:50:14 -07:00 · 2024-06-20 12:42:10 -07:00 · 2024-06-20 12:27:42 -07:00 · 2024-06-20 12:27:26 -07:00 · 2024-06-10 23:56:02 -07:00 · 2024-06-07 14:18:01 -07:00
30 changed files with 265 additions and 140 deletions
@@ -21,17 +21,15 @@ Or fork & run on Vercel

 [//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas]&#40;https://github.com/users/enricoros/projects/4/views/2&#41;** in our open roadmap)

-### What's New in 1.16.2 · Jun 7, 2024 (minor release)
+### What's New in 1.16.1...1.16.3 · Jun 20, 2024 (patch releases)

- Improve web downloads, as text, markdwon, or HTML
- Proper support for Gemini models
- Added the latest Mistral model
- Tokenizer support for gpt-4o
- Updates to Beam
-
-### What's New in 1.16.1 · May 13, 2024 (minor release)
-
- Support for the new OpenAI GPT-4o 2024-05-13 model
+- 1.16.3: Anthropic Claude 3.5 Sonnet model support
+- 1.16.2: Improve web downloads, as text, markdwon, or HTML
+- 1.16.2: Proper support for Gemini models
+- 1.16.2: Added the latest Mistral model
+- 1.16.2: Tokenizer support for gpt-4o
+- 1.16.2: Updates to Beam
+- 1.16.1: Support for the new OpenAI GPT-4o 2024-05-13 model

 ### What's New in 1.16.0 · May 9, 2024 · Crystal Clear

@@ -10,17 +10,15 @@ by release.
 - milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
 - work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)

-### What's New in 1.16.2 · Jun 7, 2024 (minor release)
+### What's New in 1.16.1...1.16.3 · Jun 20, 2024 (patch releases)

- Improve web downloads, as text, markdwon, or HTML
- Proper support for Gemini models
- Added the latest Mistral model
- Tokenizer support for gpt-4o
- Updates to Beam
-
-### What's New in 1.16.1 · May 13, 2024 (minor release)
-
- Support for the new OpenAI GPT-4o 2024-05-13 model
+- 1.16.3: Anthropic Claude 3.5 Sonnet model support
+- 1.16.2: Improve web downloads, as text, markdwon, or HTML
+- 1.16.2: Proper support for Gemini models
+- 1.16.2: Added the latest Mistral model
+- 1.16.2: Tokenizer support for gpt-4o
+- 1.16.2: Updates to Beam
+- 1.16.1: Support for the new OpenAI GPT-4o 2024-05-13 model

 ### What's New in 1.16.0 · May 9, 2024 · Crystal Clear

@@ -277,7 +277,7 @@ export function AppChat() {
    const conversation = getConversation(conversationId);
    if (!conversation)
      return;
-    const imaginedPrompt = await imaginePromptFromText(messageText) || 'An error sign.';
+    const imaginedPrompt = await imaginePromptFromText(messageText, conversationId) || 'An error sign.';
    await handleExecuteAndOutcome('generate-image', conversationId, [
      ...conversation.messages,
      createDMessage('user', imaginedPrompt),
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
 import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
 import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
 import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
-import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
 import { speakText } from '~/modules/elevenlabs/elevenlabs.client';

 import type { DMessage } from '~/common/state/store-chats';
@@ -63,7 +63,7 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
 export async function streamAssistantMessage(
  llmId: DLLMId,
  messagesHistory: VChatMessageIn[],
-  contextName: VChatContextName,
+  contextName: VChatStreamContextName,
  contextRef: VChatContextRef,
  throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
  autoSpeak: ChatAutoSpeakType,
@@ -61,7 +61,7 @@ export const NewsItems: NewsItem[] = [
    ]
  }*/
  {
-    versionCode: '1.16.2',
+    versionCode: '1.16.3',
    versionName: 'Crystal Clear',
    versionDate: new Date('2024-06-07T05:00:00Z'),
    // versionDate: new Date('2024-05-13T19:00:00Z'),
@@ -77,8 +77,9 @@ export const NewsItems: NewsItem[] = [
      { text: <>More: <B issue={517}>code soft-wrap</B>, selection toolbar, <B issue={507}>3x faster</B> on Apple silicon</>, issue: 507 },
      { text: <>Updated <B>Anthropic</B>*, <B>Groq</B>, <B>Ollama</B>, <B>OpenAI</B>*, <B>OpenRouter</B>*, and <B>Perplexity</B></> },
      { text: <>Developers: update LLMs data structures</>, dev: true },
-      { text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B> (refresh your OpenAI models)</> },
+      { text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B></> },
      { text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
+      { text: <>1.16.3: Support for <B href='https://www.anthropic.com/news/claude-3-5-sonnet'>Claude 3.5 Sonnet</B> (refresh your <B>Anthropic</B> models)</> },
    ],
  },
  {
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';


 // update this variable every time you want to broadcast a new version to clients
-export const incrementalNewsVersion: number = 16.1; // not notifying for 16.2
+export const incrementalNewsVersion: number = 16.1; // not notifying for 16.3


 interface NewsState {
@@ -1,4 +1,4 @@
-import { llmChatGenerateOrThrow, VChatFunctionIn } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatFunctionIn, VChatMessageIn } from '~/modules/llms/llm.client';
 import { useModelsStore } from '~/modules/llms/store-llms';

 import { useChatStore } from '~/common/state/store-chats';
@@ -83,13 +83,18 @@ export function autoSuggestions(conversationId: string, assistantMessageId: stri

  // Follow-up: Auto-Diagrams
  if (suggestDiagrams) {
-    llmChatGenerateOrThrow(funcLLMId, [
-        { role: 'system', content: systemMessage.text },
-        { role: 'user', content: userMessage.text },
-        { role: 'assistant', content: assistantMessageText },
-      ], [suggestPlantUMLFn], 'draw_plantuml_diagram',
+    const instructions: VChatMessageIn[] = [
+      { role: 'system', content: systemMessage.text },
+      { role: 'user', content: userMessage.text },
+      { role: 'assistant', content: assistantMessageText },
+    ];
+    llmChatGenerateOrThrow(
+      funcLLMId,
+      instructions,
+      'chat-followup-diagram', conversationId,
+      [suggestPlantUMLFn], 'draw_plantuml_diagram',
    ).then(chatResponse => {
-
+      // cheap way to check if the function was supported
      if (!('function_arguments' in chatResponse))
        return;

@@ -1,5 +1,5 @@
 import { getFastLLMId } from '~/modules/llms/store-llms';
-import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';

 import { useChatStore } from '~/common/state/store-chats';

@@ -34,21 +34,23 @@ export async function conversationAutoTitle(conversationId: string, forceReplace

  try {
    // LLM chat-generate call
+    const instructions: VChatMessageIn[] = [
+      { role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
+      {
+        role: 'user', content:
+          'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
+          'summarizes the conversation in as little as a couple of words.\n' +
+          'Only respond with the lowercase short title and nothing else.\n' +
+          '\n' +
+          '```\n' +
+          historyLines.join('\n') +
+          '```\n',
+      },
+    ];
    const chatResponse = await llmChatGenerateOrThrow(
      fastLLMId,
-      [
-        { role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
-        {
-          role: 'user', content:
-            'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
-            'summarizes the conversation in as little as a couple of words.\n' +
-            'Only respond with the lowercase short title and nothing else.\n' +
-            '\n' +
-            '```\n' +
-            historyLines.join('\n') +
-            '```\n',
-        },
-      ],
+      instructions,
+      'chat-ai-title', conversationId,
      null, null,
    );

@@ -1,5 +1,5 @@
 import { getFastLLMId } from '~/modules/llms/store-llms';
-import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';


 const simpleImagineSystemPrompt =
@@ -10,14 +10,15 @@ Provide output as a lowercase prompt and nothing else.`;
 /**
 * Creates a caption for a drawing or photo given some description - used to elevate the quality of the imaging
 */
-export async function imaginePromptFromText(messageText: string): Promise<string | null> {
+export async function imaginePromptFromText(messageText: string, contextRef: string): Promise<string | null> {
  const fastLLMId = getFastLLMId();
  if (!fastLLMId) return null;
  try {
-    const chatResponse = await llmChatGenerateOrThrow(fastLLMId, [
+    const instructions: VChatMessageIn[] = [
      { role: 'system', content: simpleImagineSystemPrompt },
      { role: 'user', content: 'Write a prompt, based on the following input.\n\n```\n' + messageText.slice(0, 1000) + '\n```\n' },
-    ], null, null);
+    ];
+    const chatResponse = await llmChatGenerateOrThrow(fastLLMId, instructions, 'draw-expand-prompt', contextRef, null, null);
    return chatResponse.content?.trim() ?? null;
  } catch (error: any) {
    console.error('imaginePromptFromText: fetch request error:', error);
@@ -132,7 +132,7 @@ export class Agent {
    S.messages.push({ role: 'user', content: prompt });
    let content: string;
    try {
-      content = (await llmChatGenerateOrThrow(llmId, S.messages, null, null, 500)).content;
+      content = (await llmChatGenerateOrThrow(llmId, S.messages, 'chat-react-turn', null, null, null, 500)).content;
    } catch (error: any) {
      content = `Error in llmChatGenerateOrThrow: ${error}`;
    }
@@ -1,5 +1,5 @@
 import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
-import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';


 // prompt to be tried when doing recursive summerization.
@@ -80,10 +80,11 @@ async function cleanUpContent(chunk: string, llmId: DLLMId, _ignored_was_targetW
  const autoResponseTokensSize = contextTokens ? Math.floor(contextTokens * outputTokenShare) : null;

  try {
-    const chatResponse = await llmChatGenerateOrThrow(llmId, [
+    const instructions: VChatMessageIn[] = [
      { role: 'system', content: cleanupPrompt },
      { role: 'user', content: chunk },
-    ], null, null, autoResponseTokensSize ?? undefined);
+    ];
+    const chatResponse = await llmChatGenerateOrThrow(llmId, instructions, 'chat-ai-summarize', null, null, null, autoResponseTokensSize ?? undefined);
    return chatResponse?.content ?? '';
  } catch (error: any) {
    return '';
@@ -1,7 +1,7 @@
 import * as React from 'react';

 import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';


 // set to true to log to the console
@@ -20,7 +20,7 @@ export interface LLMChainStep {
 /**
 * React hook to manage a chain of LLM transformations.
 */
-export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {
+export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatStreamContextName, contextRef: VChatContextRef) {

  // state
  const [chain, setChain] = React.useState<ChainState | null>(null);
@@ -1,7 +1,7 @@
 import * as React from 'react';

 import type { DLLMId } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';


 export function useStreamChatText() {
@@ -13,7 +13,7 @@ export function useStreamChatText() {
  const abortControllerRef = React.useRef<AbortController | null>(null);


-  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
+  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatStreamContextName, contextRef: VChatContextRef) => {
    setStreamError(null);
    setPartialText(null);
    setText(null);
@@ -2,7 +2,7 @@ import { sendGAEvent } from '@next/third-parties/google';

 import { hasGoogleAnalytics } from '~/common/components/GoogleAnalytics';

-import type { ModelDescriptionSchema } from './server/llm.server.types';
+import type { GenerateContextNameSchema, ModelDescriptionSchema, StreamingContextNameSchema } from './server/llm.server.types';
 import type { OpenAIWire } from './server/openai/openai.wiretypes';
 import type { StreamingClientUpdate } from './vendors/unifiedStreamingClient';
 import { DLLM, DLLMId, DModelSource, DModelSourceId, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, useModelsStore } from './store-llms';
@@ -21,14 +21,8 @@ export interface VChatMessageIn {

 export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;

-export type VChatContextName =
-  | 'conversation'
-  | 'ai-diagram'
-  | 'ai-flattener'
-  | 'beam-scatter'
-  | 'beam-gather'
-  | 'call'
-  | 'persona-extract';
+export type VChatStreamContextName = StreamingContextNameSchema;
+export type VChatGenerateContextName = GenerateContextNameSchema;
 export type VChatContextRef = string;

 export interface VChatMessageOut {
@@ -122,7 +116,10 @@ function modelDescriptionToDLLMOpenAIOptions<TSourceSetup, TLLMOptions>(model: M
 export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
  llmId: DLLMId,
  messages: VChatMessageIn[],
-  functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
+  contextName: VChatGenerateContextName,
+  contextRef: VChatContextRef | null,
+  functions: VChatFunctionIn[] | null,
+  forceFunctionName: string | null,
  maxTokens?: number,
 ): Promise<VChatMessageOut | VChatMessageOrFunctionCallOut> {

@@ -146,14 +143,14 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
    await new Promise(resolve => setTimeout(resolve, delay));

  // execute via the vendor
-  return await vendor.rpcChatGenerateOrThrow(access, options, messages, functions, forceFunctionName, maxTokens);
+  return await vendor.rpcChatGenerateOrThrow(access, options, messages, contextName, contextRef, functions, forceFunctionName, maxTokens);
 }


 export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
  llmId: DLLMId,
  messages: VChatMessageIn[],
-  contextName: VChatContextName,
+  contextName: VChatStreamContextName,
  contextRef: VChatContextRef,
  functions: VChatFunctionIn[] | null,
  forceFunctionName: string | null,
@@ -1,4 +1,5 @@
 import * as React from 'react';
+import TimeAgo from 'react-timeago';
 import { shallow } from 'zustand/shallow';

 import { Box, Button, ButtonGroup, Divider, FormControl, Input, Switch, Tooltip, Typography } from '@mui/joy';
@@ -132,10 +133,10 @@ export function LLMOptionsModal(props: { id: DLLMId, onClose: () => void }) {
            llm id: {llm.id}<br />
            context tokens: <b>{llm.contextTokens ? llm.contextTokens.toLocaleString() : 'not provided'}</b>{` · `}
            max output tokens: <b>{llm.maxOutputTokens ? llm.maxOutputTokens.toLocaleString() : 'not provided'}</b><br />
-            {!!llm.created && <>created: {(new Date(llm.created * 1000)).toLocaleString()}<br /></>}
+            {!!llm.created && <>created: <TimeAgo date={new Date(llm.created * 1000)} /><br /></>}
            {/*· tags: {llm.tags.join(', ')}*/}
            {!!llm.pricing && <>pricing: $<b>{llm.pricing.chatIn || '(unk) '}</b>/M in, $<b>{llm.pricing.chatOut || '(unk) '}</b>/M out<br /></>}
-            {!!llm.benchmark && <>benchmark: <b>{llm.benchmark.cbaElo?.toLocaleString() || '(unk) '}</b> CBA Elo<br /></>}
+            {/*{!!llm.benchmark && <>benchmark: <b>{llm.benchmark.cbaElo?.toLocaleString() || '(unk) '}</b> CBA Elo<br /></>}*/}
            config: {JSON.stringify(llm.options)}
          </Typography>
        </Box>}
@@ -4,14 +4,64 @@ import { LLM_IF_OAI_Chat, LLM_IF_OAI_Vision } from '../../store-llms';

 const roundTime = (date: string) => Math.round(new Date(date).getTime() / 1000);

-export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
+export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: boolean })[] = [
+  // Claude 3.5 models - https://docs.anthropic.com/en/docs/about-claude/models
+  // {
+  //   id: 'claude-3.5-opus', // ...
+  //   label: 'Claude 3.5 Opus',
+  //   created: roundTime(?),
+  //   description: ?,
+  //   contextWindow: 200000 ?, // Characters
+  //   maxCompletionTokens: 4096 ?,
+  //   trainingDataCutoff: ?,
+  //   interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
+  //   pricing: { chatIn: 15, chatOut: 75 },
+  //   benchmark: {
+  //     cbaElo: 1256, // Placeholder
+  //     cbaMmlu: 86.8, // Placeholder
+  //   },
+  // },
+  {
+    id: 'claude-3-5-sonnet-20240620',
+    label: 'Claude 3.5 Sonnet',
+    created: roundTime('2024-06-20 06:00'),
+    description: 'The most intelligent Claude model',
+    contextWindow: 200000, // Characters
+    maxCompletionTokens: 4096,
+    trainingDataCutoff: 'Apr 2024',
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
+    pricing: { chatIn: 3, chatOut: 15 },
+    benchmark: {
+      heCode: 92.0,
+      vqaMmmu: 68.3,
+      // TODO: Update with official benchmarks when available
+      cbaElo: 1256 - 1, // Placeholder
+      cbaMmlu: 86.8 - 1, // Placeholder
+    },
+  },
+  // {
+  //   id: 'claude-3.5-haiku', // ...
+  //   label: 'Claude 3.5 Haiku',
+  //   created: roundTime(?),
+  //   description: ?,
+  //   contextWindow: 200000 ?, // Characters
+  //   maxCompletionTokens: 4096 ?,
+  //   trainingDataCutoff: ?,
+  //   interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
+  //   pricing: { chatIn: 0.25, chatOut: 1.25 },
+  //   benchmark: {
+  //     cbaElo: 1181, // Placeholder
+  //     cbaMmlu: 75.2, // Placeholder
+  //   },
+  // },

-  // Claude-3 models - https://docs.anthropic.com/claude/docs/models-overview#model-comparison
+
+  // Claude 3 models
  {
    id: 'claude-3-opus-20240229',
    label: 'Claude 3 Opus',
    created: roundTime('2024-02-29'),
-    description: 'Most powerful model for highly complex tasks',
+    description: 'Powerful model for complex tasks',
    contextWindow: 200000,
    maxCompletionTokens: 4096,
    trainingDataCutoff: 'Aug 2023',
@@ -23,19 +73,20 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    id: 'claude-3-sonnet-20240229',
    label: 'Claude 3 Sonnet',
    created: roundTime('2024-02-29'),
-    description: 'Ideal balance of intelligence and speed for enterprise workloads',
+    description: 'Balance of speed, cost, and performance',
    contextWindow: 200000,
    maxCompletionTokens: 4096,
    trainingDataCutoff: 'Aug 2023',
    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision],
    pricing: { chatIn: 3, chatOut: 15 },
    benchmark: { cbaElo: 1203, cbaMmlu: 79 },
+    hidden: true,
  },
  {
    id: 'claude-3-haiku-20240307',
    label: 'Claude 3 Haiku',
    created: roundTime('2024-03-07'),
-    description: 'Fastest and most compact model for near-instant responsiveness',
+    description: 'Fastest, most cost-effective model',
    contextWindow: 200000,
    maxCompletionTokens: 4096,
    trainingDataCutoff: 'Aug 2023',
@@ -55,6 +106,7 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    interfaces: [LLM_IF_OAI_Chat],
    pricing: { chatIn: 8, chatOut: 24 },
    benchmark: { cbaElo: 1119 },
+    hidden: true,
  },
  {
    id: 'claude-2.0',
@@ -77,25 +129,26 @@ export const hardcodedAnthropicModels: ModelDescriptionSchema[] = [
    maxCompletionTokens: 4096,
    interfaces: [LLM_IF_OAI_Chat],
    pricing: { chatIn: 0.8, chatOut: 2.4 },
-  },
-  {
-    id: 'claude-instant-1.1',
-    label: 'Claude Instant 1.1',
-    created: roundTime('2023-03-14'),
-    description: 'Precise and fast',
-    contextWindow: 100000,
-    maxCompletionTokens: 2048,
-    interfaces: [LLM_IF_OAI_Chat],
-    hidden: true,
-  },
-  {
-    id: 'claude-1.3',
-    label: 'Claude 1.3',
-    created: roundTime('2023-03-14'),
-    description: 'Claude 1.3 is the latest version of Claude v1',
-    contextWindow: 100000,
-    maxCompletionTokens: 4096,
-    interfaces: [LLM_IF_OAI_Chat],
    hidden: true,
  },
+  // {
+  //   id: 'claude-instant-1.1',
+  //   label: 'Claude Instant 1.1',
+  //   created: roundTime('2023-03-14'),
+  //   description: 'Precise and fast',
+  //   contextWindow: 100000,
+  //   maxCompletionTokens: 2048,
+  //   interfaces: [LLM_IF_OAI_Chat],
+  //   hidden: true,
+  // },
+  // {
+  //   id: 'claude-1.3',
+  //   label: 'Claude 1.3',
+  //   created: roundTime('2023-03-14'),
+  //   description: 'Claude 1.3 is the latest version of Claude v1',
+  //   contextWindow: 100000,
+  //   maxCompletionTokens: 4096,
+  //   interfaces: [LLM_IF_OAI_Chat],
+  //   hidden: true,
+  // },
 ];
@@ -8,7 +8,7 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
 import { fixupHost } from '~/common/util/urlUtils';

 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
-import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';

 import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
 import { hardcodedAnthropicModels } from './anthropic.models';
@@ -158,7 +158,11 @@ const listModelsInputSchema = z.object({

 const chatGenerateInputSchema = z.object({
  access: anthropicAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  // functions: openAIFunctionsSchema.optional(),
+  // forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });


@@ -3,6 +3,10 @@ import type { ModelDescriptionSchema } from '../llm.server.types';
 import { LLM_IF_OAI_Chat, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '../../store-llms';


+// dev options
+const DEV_DEBUG_GEMINI_MODELS = false;
+
+
 // supported interfaces
 const geminiChatInterfaces: GeminiModelSchema['supportedGenerationMethods'] = ['generateContent'];

@@ -175,6 +179,9 @@ export function geminiSortModels(a: ModelDescriptionSchema, b: ModelDescriptionS
 export function geminiModelToModelDescription(geminiModel: GeminiModelSchema): ModelDescriptionSchema {
  const { description, displayName, name: modelId, supportedGenerationMethods } = geminiModel;

+  if (DEV_DEBUG_GEMINI_MODELS)
+    console.log('geminiModelToModelDescription', geminiModel);
+
  // find known manual mapping
  const knownModel = _knownGeminiModels.find(m => m.id === modelId);

@@ -8,7 +8,7 @@ import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
 import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';

 import { fixupHost } from '~/common/util/urlUtils';
-import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';

 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';

@@ -120,8 +120,11 @@ const accessOnlySchema = z.object({

 const chatGenerateInputSchema = z.object({
  access: geminiAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
-  // functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  // functions: openAIFunctionsSchema.optional(),
+  // forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });


@@ -18,6 +18,7 @@ const geminiModelSchema = z.object({
  inputTokenLimit: z.number().int().min(1),
  outputTokenLimit: z.number().int().min(1),
  supportedGenerationMethods: z.array(z.enum([
+    'createCachedContent', // appeared on 2024-06-10, see https://github.com/enricoros/big-AGI/issues/565
    'countMessageTokens',
    'countTextTokens',
    'countTokens',
@@ -22,6 +22,9 @@ import type { OpenAIWire } from './openai/openai.wiretypes';
 import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';


+import { llmsStreamingContextSchema } from './llm.server.types';
+
+
 // configuration
 const USER_SYMBOL_MAX_TOKENS = '🧱';
 const USER_SYMBOL_PROMPT_BLOCKED = '🚫';
@@ -46,17 +49,14 @@ type MuxingFormat = 'sse' | 'json-nl';
 */
 type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };

-const streamingContextSchema = z.object({
-  method: z.literal('chat-stream'),
-  name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
-  ref: z.string(),
-});

 const chatStreamingInputSchema = z.object({
  access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
  model: openAIModelSchema,
  history: openAIHistorySchema,
-  context: streamingContextSchema,
+  // NOTE: made it optional for now as we have some old requests without it
+  // 2024-07-07: remove .optional()
+  context: llmsStreamingContextSchema.optional(),
 });
 export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;

@@ -12,6 +12,8 @@ const pricingSchema = z.object({
 const benchmarkSchema = z.object({
  cbaElo: z.number().optional(),
  cbaMmlu: z.number().optional(),
+  heCode: z.number().optional(), // HumanEval, code, 0-shot
+  vqaMmmu: z.number().optional(), // Visual Question Answering, MMMU, 0-shot
 });

 // const rateLimitsSchema = z.object({
@@ -46,6 +48,25 @@ export const llmsListModelsOutputSchema = z.object({
 });


+// Chat Generation Input (some parts of)
+
+const generateContextNameSchema = z.enum(['chat-ai-title', 'chat-ai-summarize', 'chat-followup-diagram', 'chat-react-turn', 'draw-expand-prompt']);
+export type GenerateContextNameSchema = z.infer<typeof generateContextNameSchema>;
+export const llmsGenerateContextSchema = z.object({
+  method: z.literal('chat-generate'),
+  name: generateContextNameSchema,
+  ref: z.string(),
+});
+
+const streamingContextNameSchema = z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']);
+export type StreamingContextNameSchema = z.infer<typeof streamingContextNameSchema>;
+export const llmsStreamingContextSchema = z.object({
+  method: z.literal('chat-stream'),
+  name: streamingContextNameSchema,
+  ref: z.string(),
+});
+
+
 // (non-streaming) Chat Generation Output

 export const llmsChatGenerateOutputSchema = z.object({
@@ -11,7 +11,7 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
 import { fixupHost } from '~/common/util/urlUtils';

 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
-import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';

 import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
 import { WireOllamaChatCompletionInput, wireOllamaChunkedOutputSchema, wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
@@ -117,8 +117,11 @@ const adminPullModelSchema = z.object({

 const chatGenerateInputSchema = z.object({
  access: ollamaAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
-  // functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  // functions: openAIFunctionsSchema.optional(),
+  // forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });

 const listPullableOutputSchema = z.object({
@@ -12,7 +12,7 @@ import { fixupHost } from '~/common/util/urlUtils';

 import { OpenAIWire, WireOpenAICreateImageOutput, wireOpenAICreateImageOutputSchema, WireOpenAICreateImageRequest } from './openai.wiretypes';
 import { azureModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, mistralModelsSort, mistralModelToModelDescription, oobaboogaModelToModelDescription, openAIModelFilter, openAIModelToModelDescription, openRouterModelFamilySortFn, openRouterModelToModelDescription, perplexityAIModelDescriptions, perplexityAIModelSort, togetherAIModelsToModelDescriptions } from './models.data';
-import { llmsChatGenerateWithFunctionsOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
+import { llmsChatGenerateWithFunctionsOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
 import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';


@@ -72,8 +72,11 @@ const listModelsInputSchema = z.object({

 const chatGenerateWithFunctionsInputSchema = z.object({
  access: openAIAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
-  functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  functions: openAIFunctionsSchema.optional(),
+  forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });

 const createImagesInputSchema = z.object({
@@ -108,7 +111,7 @@ export const llmOpenAIRouter = createTRPCRouter({

      // [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping
      if (access.dialect === 'azure') {
-        const azureModels = await openaiGET(access, `/openai/deployments?api-version=2023-03-15-preview`);
+        const azureModels = await openaiGETOrThrow(access, `/openai/deployments?api-version=2023-03-15-preview`);

        const wireAzureListDeploymentsSchema = z.object({
          data: z.array(z.object({
@@ -146,7 +149,7 @@ export const llmOpenAIRouter = createTRPCRouter({


      // [non-Azure]: fetch openAI-style for all but Azure (will be then used in each dialect)
-      const openAIWireModelsResponse = await openaiGET<OpenAIWire.Models.Response>(access, '/v1/models');
+      const openAIWireModelsResponse = await openaiGETOrThrow<OpenAIWire.Models.Response>(access, '/v1/models');

      // [Together] missing the .data property
      if (access.dialect === 'togetherai')
@@ -267,17 +270,22 @@ export const llmOpenAIRouter = createTRPCRouter({
    .output(llmsChatGenerateWithFunctionsOutputSchema)
    .mutation(async ({ input }) => {

-      const { access, model, history, functions, forceFunctionName } = input;
+      const { access, model, history, functions, forceFunctionName, context } = input;
      const isFunctionsCall = !!functions && functions.length > 0;

      const completionsBody = openAIChatCompletionPayload(access.dialect, model, history, isFunctionsCall ? functions : null, forceFunctionName ?? null, 1, false);
-      const wireCompletions = await openaiPOST<OpenAIWire.ChatCompletion.Response, OpenAIWire.ChatCompletion.Request>(
+      const wireCompletions = await openaiPOSTOrThrow<OpenAIWire.ChatCompletion.Response, OpenAIWire.ChatCompletion.Request>(
        access, model.id, completionsBody, '/v1/chat/completions',
      );

      // expect a single output
-      if (wireCompletions?.choices?.length !== 1)
-        throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: `[OpenAI Issue] Expected 1 completion, got ${wireCompletions?.choices?.length}` });
+      if (wireCompletions?.choices?.length !== 1) {
+        console.error(`[POST] llmOpenAI.chatGenerateWithFunctions: ${access.dialect}: ${context?.name || 'no context'}: unexpected output${forceFunctionName ? ` (fn: ${forceFunctionName})` : ''}:`, model.id, wireCompletions?.choices);
+        throw new TRPCError({
+          code: 'UNPROCESSABLE_CONTENT',
+          message: `[OpenAI Issue] Expected 1 completion, got ${wireCompletions?.choices?.length}`,
+        });
+      }
      let { message, finish_reason } = wireCompletions.choices[0];

      // LocalAI hack/workaround, until https://github.com/go-skynet/LocalAI/issues/788 is fixed
@@ -318,7 +326,7 @@ export const llmOpenAIRouter = createTRPCRouter({
        delete requestBody.response_format;

      // create 1 image (dall-e-3 won't support more than 1, so better transfer the burden to the client)
-      const wireOpenAICreateImageOutput = await openaiPOST<WireOpenAICreateImageOutput, WireOpenAICreateImageRequest>(
+      const wireOpenAICreateImageOutput = await openaiPOSTOrThrow<WireOpenAICreateImageOutput, WireOpenAICreateImageRequest>(
        access, null, requestBody, '/v1/images/generations',
      );

@@ -340,7 +348,7 @@ export const llmOpenAIRouter = createTRPCRouter({
    .mutation(async ({ input: { access, text } }): Promise<OpenAIWire.Moderation.Response> => {
      try {

-        return await openaiPOST<OpenAIWire.Moderation.Response, OpenAIWire.Moderation.Request>(access, null, {
+        return await openaiPOSTOrThrow<OpenAIWire.Moderation.Response, OpenAIWire.Moderation.Request>(access, null, {
          input: text,
          model: 'text-moderation-latest',
        }, '/v1/moderations');
@@ -361,7 +369,7 @@ export const llmOpenAIRouter = createTRPCRouter({
  dialectLocalAI_galleryModelsAvailable: publicProcedure
    .input(listModelsInputSchema)
    .query(async ({ input: { access } }) => {
-      const wireLocalAIModelsAvailable = await openaiGET(access, '/models/available');
+      const wireLocalAIModelsAvailable = await openaiGETOrThrow(access, '/models/available');
      return wireLocalAIModelsAvailableOutputSchema.parse(wireLocalAIModelsAvailable);
    }),

@@ -374,7 +382,7 @@ export const llmOpenAIRouter = createTRPCRouter({
    }))
    .mutation(async ({ input: { access, galleryName, modelName } }) => {
      const galleryModelId = `${galleryName}@${modelName}`;
-      const wireLocalAIModelApply = await openaiPOST(access, null, { id: galleryModelId }, '/models/apply');
+      const wireLocalAIModelApply = await openaiPOSTOrThrow(access, null, { id: galleryModelId }, '/models/apply');
      return wilreLocalAIModelsApplyOutputSchema.parse(wireLocalAIModelApply);
    }),

@@ -385,7 +393,7 @@ export const llmOpenAIRouter = createTRPCRouter({
      jobId: z.string(),
    }))
    .query(async ({ input: { access, jobId } }) => {
-      const wireLocalAIModelsJobs = await openaiGET(access, `/models/jobs/${jobId}`);
+      const wireLocalAIModelsJobs = await openaiGETOrThrow(access, `/models/jobs/${jobId}`);
      return wireLocalAIModelsListOutputSchema.parse(wireLocalAIModelsJobs);
    }),

@@ -623,12 +631,12 @@ export function openAIChatCompletionPayload(dialect: OpenAIDialects, model: Open
  };
 }

-async function openaiGET<TOut extends object>(access: OpenAIAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
+async function openaiGETOrThrow<TOut extends object>(access: OpenAIAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
  const { headers, url } = openAIAccess(access, null, apiPath);
  return await fetchJsonOrTRPCError<TOut>(url, 'GET', headers, undefined, `OpenAI/${access.dialect}`);
 }

-async function openaiPOST<TOut extends object, TPostBody extends object>(access: OpenAIAccessSchema, modelRefId: string | null, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
+async function openaiPOSTOrThrow<TOut extends object, TPostBody extends object>(access: OpenAIAccessSchema, modelRefId: string | null, body: TPostBody, apiPath: string /*, signal?: AbortSignal*/): Promise<TOut> {
  const { headers, url } = openAIAccess(access, modelRefId, apiPath);
  return await fetchJsonOrTRPCError<TOut, TPostBody>(url, 'POST', headers, body, `OpenAI/${access.dialect}`);
 }
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
 import type { ModelDescriptionSchema } from '../server/llm.server.types';
 import type { ModelVendorId } from './vendors.registry';
 import type { StreamingClientUpdate } from './unifiedStreamingClient';
-import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
+import type { VChatContextRef, VChatFunctionIn, VChatGenerateContextName, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut, VChatStreamContextName } from '../llm.client';


 export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
@@ -44,6 +44,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
    access: TAccess,
    llmOptions: TLLMOptions,
    messages: VChatMessageIn[],
+    contextName: VChatGenerateContextName, contextRef: VChatContextRef | null,
    functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
    maxTokens?: number,
  ) => Promise<VChatMessageOut | VChatMessageOrFunctionCallOut>;
@@ -53,7 +54,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
    llmId: DLLMId,
    llmOptions: TLLMOptions,
    messages: VChatMessageIn[],
-    contextName: VChatContextName, contexRef: VChatContextRef,
+    contextName: VChatStreamContextName, contextRef: VChatContextRef,
    functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
    abortSignal: AbortSignal,
    onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';

 import type { AnthropicAccessSchema } from '../../server/anthropic/anthropic.router';
 import type { IModelVendor } from '../IModelVendor';
-import type { VChatMessageOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';

 import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
@@ -47,7 +47,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
  rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmAnthropic.listModels.query({ access }),

  // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
    if (functions?.length || forceFunctionName)
      throw new Error('Anthropic does not support functions');

@@ -61,6 +61,11 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
          maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
        },
        history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
      }) as VChatMessageOut;
    } catch (error: any) {
      const errorMessage = error?.message || error?.toString() || 'Anthropic Chat Generate Error';
@@ -1,10 +1,10 @@
 import { GeminiIcon } from '~/common/components/icons/vendors/GeminiIcon';
- import { apiAsync } from '~/common/util/trpc.client';
+import { apiAsync } from '~/common/util/trpc.client';

 import type { GeminiAccessSchema } from '../../server/gemini/gemini.router';
 import type { GeminiBlockSafetyLevel } from '../../server/gemini/gemini.wiretypes';
 import type { IModelVendor } from '../IModelVendor';
-import type { VChatMessageOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';

 import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE } from '../openai/openai.vendor';
@@ -60,7 +60,7 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
  rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmGemini.listModels.query({ access }),

  // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
    if (functions?.length || forceFunctionName)
      throw new Error('Gemini does not support functions');

@@ -74,6 +74,11 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
          maxTokens: maxTokens || maxOutputTokens || FALLBACK_LLM_RESPONSE_TOKENS,
        },
        history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
      }) as VChatMessageOut;
    } catch (error: any) {
      const errorMessage = error?.message || error?.toString() || 'Gemini Chat Generate Error';
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';

 import type { IModelVendor } from '../IModelVendor';
 import type { OllamaAccessSchema } from '../../server/ollama/ollama.router';
-import type { VChatMessageOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';

 import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
@@ -42,7 +42,7 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
  rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOllama.listModels.query({ access }),

  // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
    if (functions?.length || forceFunctionName)
      throw new Error('Ollama does not support functions');

@@ -56,6 +56,11 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
          maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
        },
        history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
      }) as VChatMessageOut;
    } catch (error: any) {
      const errorMessage = error?.message || error?.toString() || 'Ollama Chat Generate Error';
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';

 import type { IModelVendor } from '../IModelVendor';
 import type { OpenAIAccessSchema } from '../../server/openai/openai.router';
-import type { VChatMessageOrFunctionCallOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOrFunctionCallOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';

 import { OpenAILLMOptions } from './OpenAILLMOptions';
@@ -60,7 +60,7 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
  rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOpenAI.listModels.query({ access }),

  // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
    const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
    try {
      return await apiAsync.llmOpenAI.chatGenerateWithFunctions.mutate({
@@ -73,6 +73,11 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
        functions: functions ?? undefined,
        forceFunctionName: forceFunctionName ?? undefined,
        history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
      }) as VChatMessageOrFunctionCallOut;
    } catch (error: any) {
      const errorMessage = error?.message || error?.toString() || 'OpenAI Chat Generate Error';
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';

 import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
 import type { DLLMId } from '../store-llms';
-import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';
+import type { VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatStreamContextName } from '../llm.client';

 import type { OpenAIAccessSchema } from '../server/openai/openai.router';
 import type { OpenAIWire } from '../server/openai/openai.wiretypes';
@@ -29,7 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
  llmId: DLLMId,
  llmOptions: TLLMOptions,
  messages: VChatMessageIn[],
-  contextName: VChatContextName, contextRef: VChatContextRef,
+  contextName: VChatStreamContextName, contextRef: VChatContextRef,
  functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
  abortSignal: AbortSignal,
  onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
Author	SHA1	Message	Date
Enrico Ros	7d6d7e619b	Anthropic: hardcode date	2024-06-20 12:42:10 -07:00
Enrico Ros	34caa16e39	1.16.3: release	2024-06-20 12:27:42 -07:00
Enrico Ros	976426dbd3	Anthropic: support Claude 3.5 Sonnet	2024-06-20 12:27:26 -07:00
Enrico Ros	b4d8e39d56	Gemini: acknowledge the new capability to `createCachedContent`. Fixes #565	2024-06-10 23:56:02 -07:00
Enrico Ros	11c41e7381	Function call: increase debug verbosity	2024-06-07 14:18:01 -07:00
Enrico Ros	358d8a54ff	Increase llms alignment before function calling.	2024-06-07 14:11:36 -07:00
Enrico Ros	3c8fedce68	Highlight issues with chatGenerateWithFunctions	2024-06-07 12:38:21 -07:00