Merge branch 'refs/heads/main-stable'

# Conflicts: # package-lock.json # package.json # src/common/util/token-counter.ts
2026-05-10 21:50:14 -07:00 · 2024-06-06 22:11:43 -07:00
parent 3050b546ac 0c15476dd2
commit fed351a2fc
18 changed files with 91 additions and 37 deletions
@@ -21,7 +21,15 @@ Or fork & run on Vercel

 [//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas]&#40;https://github.com/users/enricoros/projects/4/views/2&#41;** in our open roadmap)

-### What's New in 1.16.1 · May 13, 2024 (minor release, models support)
+### What's New in 1.16.2 · Jun 7, 2024 (minor release)
+
+- Improve web downloads, as text, markdwon, or HTML
+- Proper support for Gemini models
+- Added the latest Mistral model
+- Tokenizer support for gpt-4o
+- Updates to Beam
+
+### What's New in 1.16.1 · May 13, 2024 (minor release)

 - Support for the new OpenAI GPT-4o 2024-05-13 model

@@ -10,7 +10,15 @@ by release.
 - milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
 - work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)

-### What's New in 1.16.1 · May 13, 2024 (minor release, models support)
+### What's New in 1.16.2 · Jun 7, 2024 (minor release)
+
+- Improve web downloads, as text, markdwon, or HTML
+- Proper support for Gemini models
+- Added the latest Mistral model
+- Tokenizer support for gpt-4o
+- Updates to Beam
+
+### What's New in 1.16.1 · May 13, 2024 (minor release)

 - Support for the new OpenAI GPT-4o 2024-05-13 model

@@ -1,5 +1,5 @@
 import * as React from 'react';
-import { shallow } from 'zustand/shallow';
+import { useShallow } from 'zustand/react/shallow';

 import { Box, Card, ListDivider, ListItemDecorator, MenuItem, Switch, Typography } from '@mui/joy';
 import ArrowBackIcon from '@mui/icons-material/ArrowBack';
@@ -99,7 +99,7 @@ export function Telephone(props: {

  // external state
  const { chatLLMId, chatLLMDropdown } = useChatLLMDropdown();
-  const { chatTitle, reMessages } = useChatStore(state => {
+  const { chatTitle, reMessages } = useChatStore(useShallow(state => {
    const conversation = props.callIntent.conversationId
      ? state.conversations.find(conversation => conversation.id === props.callIntent.conversationId) ?? null
      : null;
@@ -107,7 +107,7 @@ export function Telephone(props: {
      chatTitle: conversation ? conversationTitle(conversation) : null,
      reMessages: conversation ? conversation.messages : null,
    };
-  }, shallow);
+  }));
  const persona = SystemPurposes[props.callIntent.personaId as SystemPurposeId] ?? undefined;
  const personaCallStarters = persona?.call?.starters ?? undefined;
  const personaVoiceId = overridePersonaVoice ? undefined : (persona?.voices?.elevenLabs?.voiceId ?? undefined);
@@ -225,7 +225,7 @@ export function Telephone(props: {
    let finalText = '';
    let error: any | null = null;
    setPersonaTextInterim('💭...');
-    llmStreamingChatGenerate(chatLLMId, callPrompt, null, null, responseAbortController.current.signal, ({ textSoFar }) => {
+    llmStreamingChatGenerate(chatLLMId, callPrompt, 'call', callMessages[0].id, null, null, responseAbortController.current.signal, ({ textSoFar }) => {
      const text = textSoFar?.trim();
      if (text) {
        finalText = text;
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
 import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
 import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
 import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
-import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
 import { speakText } from '~/modules/elevenlabs/elevenlabs.client';

 import type { DMessage } from '~/common/state/store-chats';
@@ -34,6 +34,8 @@ export async function runAssistantUpdatingState(conversationId: string, history:
  const messageStatus = await streamAssistantMessage(
    assistantLlmId,
    history.map((m): VChatMessageIn => ({ role: m.role, content: m.text })),
+    'conversation',
+    conversationId,
    parallelViewCount,
    autoSpeak,
    (update) => cHandler.messageEdit(assistantMessageId, update, false),
@@ -61,6 +63,8 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
 export async function streamAssistantMessage(
  llmId: DLLMId,
  messagesHistory: VChatMessageIn[],
+  contextName: VChatContextName,
+  contextRef: VChatContextRef,
  throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
  autoSpeak: ChatAutoSpeakType,
  editMessage: (update: Partial<DMessage>) => void,
@@ -92,7 +96,7 @@ export async function streamAssistantMessage(
  const incrementalAnswer: Partial<DMessage> = { text: '' };

  try {
-    await llmStreamingChatGenerate(llmId, messagesHistory, null, null, abortSignal, (update: StreamingClientUpdate) => {
+    await llmStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal, (update: StreamingClientUpdate) => {
      const textSoFar = update.textSoFar;

      // grow the incremental message
@@ -61,9 +61,10 @@ export const NewsItems: NewsItem[] = [
    ]
  }*/
  {
-    versionCode: '1.16.1',
+    versionCode: '1.16.2',
    versionName: 'Crystal Clear',
-    versionDate: new Date('2024-05-13T19:00:00Z'),
+    versionDate: new Date('2024-06-07T05:00:00Z'),
+    // versionDate: new Date('2024-05-13T19:00:00Z'),
    // versionDate: new Date('2024-05-09T00:00:00Z'),
    versionCoverImage: coverV116,
    items: [
@@ -77,6 +78,7 @@ export const NewsItems: NewsItem[] = [
      { text: <>Updated <B>Anthropic</B>*, <B>Groq</B>, <B>Ollama</B>, <B>OpenAI</B>*, <B>OpenRouter</B>*, and <B>Perplexity</B></> },
      { text: <>Developers: update LLMs data structures</>, dev: true },
      { text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B> (refresh your OpenAI models)</> },
+      { text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
    ],
  },
  {
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';


 // update this variable every time you want to broadcast a new version to clients
-export const incrementalNewsVersion: number = 16.1;
+export const incrementalNewsVersion: number = 16.1; // not notifying for 16.2


 interface NewsState {
@@ -1,4 +1,5 @@
 import * as React from 'react';
+import { v4 as uuidv4 } from 'uuid';

 import { Alert, Box, Button, Card, CardContent, CircularProgress, Divider, FormLabel, Grid, IconButton, LinearProgress, Tab, tabClasses, TabList, TabPanel, Tabs, Typography } from '@mui/joy';
 import AddIcon from '@mui/icons-material/Add';
@@ -102,8 +103,11 @@ export function Creator(props: { display: boolean }) {
    strings: editedInstructions, stringEditors: instructionEditors,
  } = useFormEditTextArray(Prompts, PromptTitles);

-  const creationChainSteps = React.useMemo(() => {
-    return createChain(editedInstructions, PromptTitles);
+  const { steps: creationChainSteps, id: chainId } = React.useMemo(() => {
+    return {
+      steps: createChain(editedInstructions, PromptTitles),
+      id: uuidv4(),
+    };
  }, [editedInstructions]);

  const llmLabel = personaLlm?.label || undefined;
@@ -122,7 +126,7 @@ export function Creator(props: { display: boolean }) {
    chainError,
    userCancelChain,
    restartChain,
-  } = useLLMChain(creationChainSteps, personaLlm?.id, chainInputText ?? undefined, savePersona);
+  } = useLLMChain(creationChainSteps, personaLlm?.id, chainInputText ?? undefined, savePersona, 'persona-extract', chainId);


  // Reset the relevant state when the selected tab changes
@@ -68,7 +68,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
  const [diagramLlm, llmComponent] = useFormRadioLlmType('Generator', 'chat');

  // derived state
-  const { conversationId, text: subject } = props.config;
+  const { conversationId, messageId, text: subject } = props.config;
  const diagramLlmId = diagramLlm?.id;


@@ -98,7 +98,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
    const diagramPrompt = bigDiagramPrompt(diagramType, diagramLanguage, systemMessage.text, subject, customInstruction);

    try {
-      await llmStreamingChatGenerate(diagramLlm.id, diagramPrompt, null, null, stepAbortController.signal,
+      await llmStreamingChatGenerate(diagramLlm.id, diagramPrompt, 'ai-diagram', messageId, null, null, stepAbortController.signal,
        ({ textSoFar }) => textSoFar && setDiagramCode(diagramCode = textSoFar),
      );
    } catch (error: any) {
@@ -109,7 +109,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
      setAbortController(null);
    }

-  }, [abortController, conversationId, diagramLanguage, diagramLlm, diagramType, subject, customInstruction]);
+  }, [abortController, conversationId, customInstruction, diagramLanguage, diagramLlm, diagramType, messageId, subject]);


  // [Effect] Auto-abort on unmount
@@ -117,7 +117,7 @@ export function FlattenerModal(props: {
    await startStreaming(llm.id, [
      { role: 'system', content: flattenProfile.systemPrompt },
      { role: 'user', content: encodeConversationAsUserMessage(flattenProfile.userPrompt, messages) },
-    ]);
+    ], 'ai-flattener', messages[0].id);

  }, [llm, props.conversationId, startStreaming]);

@@ -1,7 +1,7 @@
 import * as React from 'react';

 import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';


 // set to true to log to the console
@@ -20,7 +20,7 @@ export interface LLMChainStep {
 /**
 * React hook to manage a chain of LLM transformations.
 */
-export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess?: (output: string, input: string) => void) {
+export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {

  // state
  const [chain, setChain] = React.useState<ChainState | null>(null);
@@ -114,7 +114,7 @@ export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, ch
    setChainStepInterimText(null);

    // LLM call (streaming, cancelable)
-    llmStreamingChatGenerate(llmId, llmChatInput, null, null, stepAbortController.signal,
+    llmStreamingChatGenerate(llmId, llmChatInput, contextName, contextRef, null, null, stepAbortController.signal,
      ({ textSoFar }) => {
        textSoFar && setChainStepInterimText(interimText = textSoFar);
      })
@@ -141,7 +141,7 @@ export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, ch
        stepAbortController.abort('step aborted');
      _chainAbortController.signal.removeEventListener('abort', globalToStepListener);
    };
-  }, [chain, llmId, onSuccess]);
+  }, [chain, contextRef, contextName, llmId, onSuccess]);


  return {
@@ -1,7 +1,7 @@
 import * as React from 'react';

 import type { DLLMId } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';


 export function useStreamChatText() {
@@ -13,7 +13,7 @@ export function useStreamChatText() {
  const abortControllerRef = React.useRef<AbortController | null>(null);


-  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[]) => {
+  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
    setStreamError(null);
    setPartialText(null);
    setText(null);
@@ -24,7 +24,7 @@ export function useStreamChatText() {

    try {
      let lastText = '';
-      await llmStreamingChatGenerate(llmId, prompt, null, null, abortControllerRef.current.signal, ({ textSoFar }) => {
+      await llmStreamingChatGenerate(llmId, prompt, contextName, contextRef, null, null, abortControllerRef.current.signal, ({ textSoFar }) => {
        if (textSoFar) {
          lastText = textSoFar;
          setPartialText(lastText);
@@ -96,7 +96,7 @@ export async function executeChatGenerate(_i: ChatGenerateInstruction, inputs: E
  };

  // LLM Streaming generation
-  return streamAssistantMessage(inputs.llmId, history, getUXLabsHighPerformance() ? 0 : 1, 'off', onMessageUpdate, inputs.chainAbortController.signal)
+  return streamAssistantMessage(inputs.llmId, history, 'beam-gather', inputs.contextRef, getUXLabsHighPerformance() ? 0 : 1, 'off', onMessageUpdate, inputs.chainAbortController.signal)
    .then((status) => {
      // re-throw errors, as streamAssistantMessage catches internally
      if (status.outcome === 'aborted') {
@@ -23,6 +23,7 @@ export interface ExecutionInputState {
  readonly chatMessages: DMessage[];
  readonly rayMessages: DMessage[];
  readonly llmId: DLLMId;
+  readonly contextRef: string; // not useful
  // interaction
  readonly chainAbortController: AbortController;
  readonly updateProgressComponent: (component: React.ReactNode) => void;
@@ -67,6 +68,7 @@ export function gatherStartFusion(
    chatMessages: chatMessages,
    rayMessages: rayMessages,
    llmId: initialFusion.llmId,
+    contextRef: initialFusion.fusionId,
    // interaction
    chainAbortController: new AbortController(),
    updateProgressComponent: (component: React.ReactNode) => onUpdateBFusion({ fusingProgressComponent: component }),
@@ -67,7 +67,7 @@ function rayScatterStart(ray: BRay, llmId: DLLMId | null, inputHistory: DMessage

  // stream the assistant's messages
  const messagesHistory: VChatMessageIn[] = inputHistory.map(({ role, text }) => ({ role, content: text }));
-  streamAssistantMessage(llmId, messagesHistory, getUXLabsHighPerformance() ? 0 : rays.length, 'off', updateMessage, abortController.signal)
+  streamAssistantMessage(llmId, messagesHistory, 'beam-scatter', ray.rayId, getUXLabsHighPerformance() ? 0 : rays.length, 'off', updateMessage, abortController.signal)
    .then((status) => {
      _rayUpdate(ray.rayId, {
        status: (status.outcome === 'success') ? 'success'
@@ -21,6 +21,16 @@ export interface VChatMessageIn {

 export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;

+export type VChatContextName =
+  | 'conversation'
+  | 'ai-diagram'
+  | 'ai-flattener'
+  | 'beam-scatter'
+  | 'beam-gather'
+  | 'call'
+  | 'persona-extract';
+export type VChatContextRef = string;
+
 export interface VChatMessageOut {
  role: 'assistant' | 'system' | 'user';
  content: string;
@@ -139,6 +149,8 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
 export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
  llmId: DLLMId,
  messages: VChatMessageIn[],
+  contextName: VChatContextName,
+  contextRef: VChatContextRef,
  functions: VChatFunctionIn[] | null,
  forceFunctionName: string | null,
  abortSignal: AbortSignal,
@@ -161,5 +173,5 @@ export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess =
    await new Promise(resolve => setTimeout(resolve, delay));

  // execute via the vendor
-  return await vendor.streamingChatGenerateOrThrow(access, llmId, llmOptions, messages, functions, forceFunctionName, abortSignal, onUpdate);
+  return await vendor.streamingChatGenerateOrThrow(access, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
 }
@@ -19,7 +19,7 @@ import { OLLAMA_PATH_CHAT, ollamaAccess, ollamaAccessSchema, ollamaChatCompletio

 // OpenAI server imports
 import type { OpenAIWire } from './openai/openai.wiretypes';
-import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from './openai/openai.router';
+import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';


 // configuration
@@ -46,11 +46,17 @@ type MuxingFormat = 'sse' | 'json-nl';
 */
 type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };

+const streamingContextSchema = z.object({
+  method: z.literal('chat-stream'),
+  name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
+  ref: z.string(),
+});

 const chatStreamingInputSchema = z.object({
  access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
  model: openAIModelSchema,
  history: openAIHistorySchema,
+  context: streamingContextSchema,
 });
 export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;

@@ -72,14 +78,15 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon

  // Parse the request
  const body = await req.json();
-  const { access, model, history } = chatStreamingInputSchema.parse(body);
-  const prettyDialect = serverCapitalizeFirstLetter(access.dialect);
+  const _chatStreamingInput: ChatStreamingInputSchema = chatStreamingInputSchema.parse(body);
+  const { dialect: accessDialect } = _chatStreamingInput.access;
+  const prettyDialect = serverCapitalizeFirstLetter(accessDialect);


  // Prepare the upstream API request and demuxer/parser
  let requestData: ReturnType<typeof _prepareRequestData>;
  try {
-    requestData = _prepareRequestData(access, model, history);
+    requestData = _prepareRequestData(_chatStreamingInput);
  } catch (error: any) {
    console.error(`[POST] /api/llms/stream: ${prettyDialect}: prepareRequestData issue:`, safeErrorString(error));
    return new NextResponse(`**[Service Issue] ${prettyDialect}**: ${safeErrorString(error) || 'Unknown streaming error'}`, {
@@ -103,7 +110,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
  } catch (error: any) {

    // server-side admins message
-    const capDialect = serverCapitalizeFirstLetter(access.dialect);
+    const capDialect = serverCapitalizeFirstLetter(accessDialect);
    const fetchOrVendorError = safeErrorString(error) + (error?.cause ? ' · ' + JSON.stringify(error.cause) : '');
    console.error(`[POST] /api/llms/stream: ${capDialect}: fetch issue:`, fetchOrVendorError, requestData?.url);

@@ -125,7 +132,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
   * a 'healthy' level of inventory (i.e., pre-buffering) on the pipe to the client.
   */
  const transformUpstreamToBigAgiClient = createUpstreamTransformer(
-    requestData.vendorMuxingFormat, requestData.vendorStreamParser, access.dialect,
+    requestData.vendorMuxingFormat, requestData.vendorStreamParser, accessDialect,
  );

  const chatResponseStream =
@@ -486,7 +493,7 @@ function createStreamParserOpenAI(): AIStreamParser {
 }


-function _prepareRequestData(access: ChatStreamingInputSchema['access'], model: OpenAIModelSchema, history: OpenAIHistorySchema): {
+function _prepareRequestData({ access, model, history, context: _context }: ChatStreamingInputSchema): {
  headers: HeadersInit;
  url: string;
  body: object;
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
 import type { ModelDescriptionSchema } from '../server/llm.server.types';
 import type { ModelVendorId } from './vendors.registry';
 import type { StreamingClientUpdate } from './unifiedStreamingClient';
-import type { VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
+import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';


 export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
@@ -53,6 +53,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
    llmId: DLLMId,
    llmOptions: TLLMOptions,
    messages: VChatMessageIn[],
+    contextName: VChatContextName, contexRef: VChatContextRef,
    functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
    abortSignal: AbortSignal,
    onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';

 import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
 import type { DLLMId } from '../store-llms';
-import type { VChatFunctionIn, VChatMessageIn } from '../llm.client';
+import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';

 import type { OpenAIAccessSchema } from '../server/openai/openai.router';
 import type { OpenAIWire } from '../server/openai/openai.wiretypes';
@@ -29,6 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
  llmId: DLLMId,
  llmOptions: TLLMOptions,
  messages: VChatMessageIn[],
+  contextName: VChatContextName, contextRef: VChatContextRef,
  functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
  abortSignal: AbortSignal,
  onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
@@ -55,6 +56,11 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
      ...(llmResponseTokens ? { maxTokens: llmResponseTokens } : {}),
    },
    history: messages,
+    context: {
+      method: 'chat-stream',
+      name: contextName, // this errors if the client VChatContextName mismatches the server z.enum
+      ref: contextRef,
+    },
  };

  // connect to the server-side streaming endpoint