Merge branch 'refs/heads/main-stable'

# Conflicts:
#	package-lock.json
#	package.json
#	src/common/util/token-counter.ts
This commit is contained in:
Enrico Ros
2024-06-06 22:11:43 -07:00
18 changed files with 91 additions and 37 deletions
+9 -1
View File
@@ -21,7 +21,15 @@ Or fork & run on Vercel
[//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas](https://github.com/users/enricoros/projects/4/views/2)** in our open roadmap)
### What's New in 1.16.1 · May 13, 2024 (minor release, models support)
### What's New in 1.16.2 · Jun 7, 2024 (minor release)
- Improve web downloads, as text, markdwon, or HTML
- Proper support for Gemini models
- Added the latest Mistral model
- Tokenizer support for gpt-4o
- Updates to Beam
### What's New in 1.16.1 · May 13, 2024 (minor release)
- Support for the new OpenAI GPT-4o 2024-05-13 model
+9 -1
View File
@@ -10,7 +10,15 @@ by release.
- milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
- work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
### What's New in 1.16.1 · May 13, 2024 (minor release, models support)
### What's New in 1.16.2 · Jun 7, 2024 (minor release)
- Improve web downloads, as text, markdwon, or HTML
- Proper support for Gemini models
- Added the latest Mistral model
- Tokenizer support for gpt-4o
- Updates to Beam
### What's New in 1.16.1 · May 13, 2024 (minor release)
- Support for the new OpenAI GPT-4o 2024-05-13 model
+4 -4
View File
@@ -1,5 +1,5 @@
import * as React from 'react';
import { shallow } from 'zustand/shallow';
import { useShallow } from 'zustand/react/shallow';
import { Box, Card, ListDivider, ListItemDecorator, MenuItem, Switch, Typography } from '@mui/joy';
import ArrowBackIcon from '@mui/icons-material/ArrowBack';
@@ -99,7 +99,7 @@ export function Telephone(props: {
// external state
const { chatLLMId, chatLLMDropdown } = useChatLLMDropdown();
const { chatTitle, reMessages } = useChatStore(state => {
const { chatTitle, reMessages } = useChatStore(useShallow(state => {
const conversation = props.callIntent.conversationId
? state.conversations.find(conversation => conversation.id === props.callIntent.conversationId) ?? null
: null;
@@ -107,7 +107,7 @@ export function Telephone(props: {
chatTitle: conversation ? conversationTitle(conversation) : null,
reMessages: conversation ? conversation.messages : null,
};
}, shallow);
}));
const persona = SystemPurposes[props.callIntent.personaId as SystemPurposeId] ?? undefined;
const personaCallStarters = persona?.call?.starters ?? undefined;
const personaVoiceId = overridePersonaVoice ? undefined : (persona?.voices?.elevenLabs?.voiceId ?? undefined);
@@ -225,7 +225,7 @@ export function Telephone(props: {
let finalText = '';
let error: any | null = null;
setPersonaTextInterim('💭...');
llmStreamingChatGenerate(chatLLMId, callPrompt, null, null, responseAbortController.current.signal, ({ textSoFar }) => {
llmStreamingChatGenerate(chatLLMId, callPrompt, 'call', callMessages[0].id, null, null, responseAbortController.current.signal, ({ textSoFar }) => {
const text = textSoFar?.trim();
if (text) {
finalText = text;
+6 -2
View File
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
import type { DMessage } from '~/common/state/store-chats';
@@ -34,6 +34,8 @@ export async function runAssistantUpdatingState(conversationId: string, history:
const messageStatus = await streamAssistantMessage(
assistantLlmId,
history.map((m): VChatMessageIn => ({ role: m.role, content: m.text })),
'conversation',
conversationId,
parallelViewCount,
autoSpeak,
(update) => cHandler.messageEdit(assistantMessageId, update, false),
@@ -61,6 +63,8 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
export async function streamAssistantMessage(
llmId: DLLMId,
messagesHistory: VChatMessageIn[],
contextName: VChatContextName,
contextRef: VChatContextRef,
throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
autoSpeak: ChatAutoSpeakType,
editMessage: (update: Partial<DMessage>) => void,
@@ -92,7 +96,7 @@ export async function streamAssistantMessage(
const incrementalAnswer: Partial<DMessage> = { text: '' };
try {
await llmStreamingChatGenerate(llmId, messagesHistory, null, null, abortSignal, (update: StreamingClientUpdate) => {
await llmStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal, (update: StreamingClientUpdate) => {
const textSoFar = update.textSoFar;
// grow the incremental message
+4 -2
View File
@@ -61,9 +61,10 @@ export const NewsItems: NewsItem[] = [
]
}*/
{
versionCode: '1.16.1',
versionCode: '1.16.2',
versionName: 'Crystal Clear',
versionDate: new Date('2024-05-13T19:00:00Z'),
versionDate: new Date('2024-06-07T05:00:00Z'),
// versionDate: new Date('2024-05-13T19:00:00Z'),
// versionDate: new Date('2024-05-09T00:00:00Z'),
versionCoverImage: coverV116,
items: [
@@ -77,6 +78,7 @@ export const NewsItems: NewsItem[] = [
{ text: <>Updated <B>Anthropic</B>*, <B>Groq</B>, <B>Ollama</B>, <B>OpenAI</B>*, <B>OpenRouter</B>*, and <B>Perplexity</B></> },
{ text: <>Developers: update LLMs data structures</>, dev: true },
{ text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B> (refresh your OpenAI models)</> },
{ text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
],
},
{
+1 -1
View File
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';
// update this variable every time you want to broadcast a new version to clients
export const incrementalNewsVersion: number = 16.1;
export const incrementalNewsVersion: number = 16.1; // not notifying for 16.2
interface NewsState {
+7 -3
View File
@@ -1,4 +1,5 @@
import * as React from 'react';
import { v4 as uuidv4 } from 'uuid';
import { Alert, Box, Button, Card, CardContent, CircularProgress, Divider, FormLabel, Grid, IconButton, LinearProgress, Tab, tabClasses, TabList, TabPanel, Tabs, Typography } from '@mui/joy';
import AddIcon from '@mui/icons-material/Add';
@@ -102,8 +103,11 @@ export function Creator(props: { display: boolean }) {
strings: editedInstructions, stringEditors: instructionEditors,
} = useFormEditTextArray(Prompts, PromptTitles);
const creationChainSteps = React.useMemo(() => {
return createChain(editedInstructions, PromptTitles);
const { steps: creationChainSteps, id: chainId } = React.useMemo(() => {
return {
steps: createChain(editedInstructions, PromptTitles),
id: uuidv4(),
};
}, [editedInstructions]);
const llmLabel = personaLlm?.label || undefined;
@@ -122,7 +126,7 @@ export function Creator(props: { display: boolean }) {
chainError,
userCancelChain,
restartChain,
} = useLLMChain(creationChainSteps, personaLlm?.id, chainInputText ?? undefined, savePersona);
} = useLLMChain(creationChainSteps, personaLlm?.id, chainInputText ?? undefined, savePersona, 'persona-extract', chainId);
// Reset the relevant state when the selected tab changes
+3 -3
View File
@@ -68,7 +68,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
const [diagramLlm, llmComponent] = useFormRadioLlmType('Generator', 'chat');
// derived state
const { conversationId, text: subject } = props.config;
const { conversationId, messageId, text: subject } = props.config;
const diagramLlmId = diagramLlm?.id;
@@ -98,7 +98,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
const diagramPrompt = bigDiagramPrompt(diagramType, diagramLanguage, systemMessage.text, subject, customInstruction);
try {
await llmStreamingChatGenerate(diagramLlm.id, diagramPrompt, null, null, stepAbortController.signal,
await llmStreamingChatGenerate(diagramLlm.id, diagramPrompt, 'ai-diagram', messageId, null, null, stepAbortController.signal,
({ textSoFar }) => textSoFar && setDiagramCode(diagramCode = textSoFar),
);
} catch (error: any) {
@@ -109,7 +109,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
setAbortController(null);
}
}, [abortController, conversationId, diagramLanguage, diagramLlm, diagramType, subject, customInstruction]);
}, [abortController, conversationId, customInstruction, diagramLanguage, diagramLlm, diagramType, messageId, subject]);
// [Effect] Auto-abort on unmount
+1 -1
View File
@@ -117,7 +117,7 @@ export function FlattenerModal(props: {
await startStreaming(llm.id, [
{ role: 'system', content: flattenProfile.systemPrompt },
{ role: 'user', content: encodeConversationAsUserMessage(flattenProfile.userPrompt, messages) },
]);
], 'ai-flattener', messages[0].id);
}, [llm, props.conversationId, startStreaming]);
+4 -4
View File
@@ -1,7 +1,7 @@
import * as React from 'react';
import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
// set to true to log to the console
@@ -20,7 +20,7 @@ export interface LLMChainStep {
/**
* React hook to manage a chain of LLM transformations.
*/
export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess?: (output: string, input: string) => void) {
export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {
// state
const [chain, setChain] = React.useState<ChainState | null>(null);
@@ -114,7 +114,7 @@ export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, ch
setChainStepInterimText(null);
// LLM call (streaming, cancelable)
llmStreamingChatGenerate(llmId, llmChatInput, null, null, stepAbortController.signal,
llmStreamingChatGenerate(llmId, llmChatInput, contextName, contextRef, null, null, stepAbortController.signal,
({ textSoFar }) => {
textSoFar && setChainStepInterimText(interimText = textSoFar);
})
@@ -141,7 +141,7 @@ export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, ch
stepAbortController.abort('step aborted');
_chainAbortController.signal.removeEventListener('abort', globalToStepListener);
};
}, [chain, llmId, onSuccess]);
}, [chain, contextRef, contextName, llmId, onSuccess]);
return {
+3 -3
View File
@@ -1,7 +1,7 @@
import * as React from 'react';
import type { DLLMId } from '~/modules/llms/store-llms';
import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
export function useStreamChatText() {
@@ -13,7 +13,7 @@ export function useStreamChatText() {
const abortControllerRef = React.useRef<AbortController | null>(null);
const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[]) => {
const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
setStreamError(null);
setPartialText(null);
setText(null);
@@ -24,7 +24,7 @@ export function useStreamChatText() {
try {
let lastText = '';
await llmStreamingChatGenerate(llmId, prompt, null, null, abortControllerRef.current.signal, ({ textSoFar }) => {
await llmStreamingChatGenerate(llmId, prompt, contextName, contextRef, null, null, abortControllerRef.current.signal, ({ textSoFar }) => {
if (textSoFar) {
lastText = textSoFar;
setPartialText(lastText);
@@ -96,7 +96,7 @@ export async function executeChatGenerate(_i: ChatGenerateInstruction, inputs: E
};
// LLM Streaming generation
return streamAssistantMessage(inputs.llmId, history, getUXLabsHighPerformance() ? 0 : 1, 'off', onMessageUpdate, inputs.chainAbortController.signal)
return streamAssistantMessage(inputs.llmId, history, 'beam-gather', inputs.contextRef, getUXLabsHighPerformance() ? 0 : 1, 'off', onMessageUpdate, inputs.chainAbortController.signal)
.then((status) => {
// re-throw errors, as streamAssistantMessage catches internally
if (status.outcome === 'aborted') {
@@ -23,6 +23,7 @@ export interface ExecutionInputState {
readonly chatMessages: DMessage[];
readonly rayMessages: DMessage[];
readonly llmId: DLLMId;
readonly contextRef: string; // not useful
// interaction
readonly chainAbortController: AbortController;
readonly updateProgressComponent: (component: React.ReactNode) => void;
@@ -67,6 +68,7 @@ export function gatherStartFusion(
chatMessages: chatMessages,
rayMessages: rayMessages,
llmId: initialFusion.llmId,
contextRef: initialFusion.fusionId,
// interaction
chainAbortController: new AbortController(),
updateProgressComponent: (component: React.ReactNode) => onUpdateBFusion({ fusingProgressComponent: component }),
+1 -1
View File
@@ -67,7 +67,7 @@ function rayScatterStart(ray: BRay, llmId: DLLMId | null, inputHistory: DMessage
// stream the assistant's messages
const messagesHistory: VChatMessageIn[] = inputHistory.map(({ role, text }) => ({ role, content: text }));
streamAssistantMessage(llmId, messagesHistory, getUXLabsHighPerformance() ? 0 : rays.length, 'off', updateMessage, abortController.signal)
streamAssistantMessage(llmId, messagesHistory, 'beam-scatter', ray.rayId, getUXLabsHighPerformance() ? 0 : rays.length, 'off', updateMessage, abortController.signal)
.then((status) => {
_rayUpdate(ray.rayId, {
status: (status.outcome === 'success') ? 'success'
+13 -1
View File
@@ -21,6 +21,16 @@ export interface VChatMessageIn {
export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;
export type VChatContextName =
| 'conversation'
| 'ai-diagram'
| 'ai-flattener'
| 'beam-scatter'
| 'beam-gather'
| 'call'
| 'persona-extract';
export type VChatContextRef = string;
export interface VChatMessageOut {
role: 'assistant' | 'system' | 'user';
content: string;
@@ -139,6 +149,8 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
llmId: DLLMId,
messages: VChatMessageIn[],
contextName: VChatContextName,
contextRef: VChatContextRef,
functions: VChatFunctionIn[] | null,
forceFunctionName: string | null,
abortSignal: AbortSignal,
@@ -161,5 +173,5 @@ export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess =
await new Promise(resolve => setTimeout(resolve, delay));
// execute via the vendor
return await vendor.streamingChatGenerateOrThrow(access, llmId, llmOptions, messages, functions, forceFunctionName, abortSignal, onUpdate);
return await vendor.streamingChatGenerateOrThrow(access, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
}
@@ -19,7 +19,7 @@ import { OLLAMA_PATH_CHAT, ollamaAccess, ollamaAccessSchema, ollamaChatCompletio
// OpenAI server imports
import type { OpenAIWire } from './openai/openai.wiretypes';
import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from './openai/openai.router';
import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';
// configuration
@@ -46,11 +46,17 @@ type MuxingFormat = 'sse' | 'json-nl';
*/
type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
const streamingContextSchema = z.object({
method: z.literal('chat-stream'),
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
ref: z.string(),
});
const chatStreamingInputSchema = z.object({
access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
model: openAIModelSchema,
history: openAIHistorySchema,
context: streamingContextSchema,
});
export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;
@@ -72,14 +78,15 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
// Parse the request
const body = await req.json();
const { access, model, history } = chatStreamingInputSchema.parse(body);
const prettyDialect = serverCapitalizeFirstLetter(access.dialect);
const _chatStreamingInput: ChatStreamingInputSchema = chatStreamingInputSchema.parse(body);
const { dialect: accessDialect } = _chatStreamingInput.access;
const prettyDialect = serverCapitalizeFirstLetter(accessDialect);
// Prepare the upstream API request and demuxer/parser
let requestData: ReturnType<typeof _prepareRequestData>;
try {
requestData = _prepareRequestData(access, model, history);
requestData = _prepareRequestData(_chatStreamingInput);
} catch (error: any) {
console.error(`[POST] /api/llms/stream: ${prettyDialect}: prepareRequestData issue:`, safeErrorString(error));
return new NextResponse(`**[Service Issue] ${prettyDialect}**: ${safeErrorString(error) || 'Unknown streaming error'}`, {
@@ -103,7 +110,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
} catch (error: any) {
// server-side admins message
const capDialect = serverCapitalizeFirstLetter(access.dialect);
const capDialect = serverCapitalizeFirstLetter(accessDialect);
const fetchOrVendorError = safeErrorString(error) + (error?.cause ? ' · ' + JSON.stringify(error.cause) : '');
console.error(`[POST] /api/llms/stream: ${capDialect}: fetch issue:`, fetchOrVendorError, requestData?.url);
@@ -125,7 +132,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
* a 'healthy' level of inventory (i.e., pre-buffering) on the pipe to the client.
*/
const transformUpstreamToBigAgiClient = createUpstreamTransformer(
requestData.vendorMuxingFormat, requestData.vendorStreamParser, access.dialect,
requestData.vendorMuxingFormat, requestData.vendorStreamParser, accessDialect,
);
const chatResponseStream =
@@ -486,7 +493,7 @@ function createStreamParserOpenAI(): AIStreamParser {
}
function _prepareRequestData(access: ChatStreamingInputSchema['access'], model: OpenAIModelSchema, history: OpenAIHistorySchema): {
function _prepareRequestData({ access, model, history, context: _context }: ChatStreamingInputSchema): {
headers: HeadersInit;
url: string;
body: object;
+2 -1
View File
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
import type { ModelDescriptionSchema } from '../server/llm.server.types';
import type { ModelVendorId } from './vendors.registry';
import type { StreamingClientUpdate } from './unifiedStreamingClient';
import type { VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
@@ -53,6 +53,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
llmId: DLLMId,
llmOptions: TLLMOptions,
messages: VChatMessageIn[],
contextName: VChatContextName, contexRef: VChatContextRef,
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
abortSignal: AbortSignal,
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
+7 -1
View File
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';
import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
import type { DLLMId } from '../store-llms';
import type { VChatFunctionIn, VChatMessageIn } from '../llm.client';
import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';
import type { OpenAIAccessSchema } from '../server/openai/openai.router';
import type { OpenAIWire } from '../server/openai/openai.wiretypes';
@@ -29,6 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
llmId: DLLMId,
llmOptions: TLLMOptions,
messages: VChatMessageIn[],
contextName: VChatContextName, contextRef: VChatContextRef,
functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
abortSignal: AbortSignal,
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
@@ -55,6 +56,11 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
...(llmResponseTokens ? { maxTokens: llmResponseTokens } : {}),
},
history: messages,
context: {
method: 'chat-stream',
name: contextName, // this errors if the client VChatContextName mismatches the server z.enum
ref: contextRef,
},
};
// connect to the server-side streaming endpoint