From 02ca84a467c548e5fddb92d2a9a91dd77fa2ba5c Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Mon, 15 Jul 2024 21:49:39 -0700 Subject: [PATCH] AIX: Massive updates --- src/apps/chat/editors/chat-persona.ts | 15 +- src/common/stores/chat/chat.fragments.ts | 9 +- src/modules/aix/client/aix.client.api.ts | 140 ------------------ ...api.ts => aix.client.fromDMessages.api.ts} | 32 ++-- src/modules/aix/client/aix.client.ts | 59 ++++---- src/modules/aix/server/aix.router.ts | 10 +- src/modules/aix/server/aix.wiretypes.ts | 60 +++++--- .../adapters/anthropic.messageCreate.ts | 69 ++++++--- .../adapters/gemini.generateContent.ts | 63 +++++--- .../adapters/openai.chatCompletions.ts | 51 ++++--- .../chatGenerate/chatGenerate.dispatch.ts | 16 +- .../dispatch/wiretypes/gemini.wiretypes.ts | 29 +++- .../dispatch/wiretypes/openai.wiretypes.ts | 28 ++-- 13 files changed, 271 insertions(+), 310 deletions(-) delete mode 100644 src/modules/aix/client/aix.client.api.ts rename src/modules/aix/client/{aix.client.messages.api.ts => aix.client.fromDMessages.api.ts} (76%) diff --git a/src/apps/chat/editors/chat-persona.ts b/src/apps/chat/editors/chat-persona.ts index ba2f92cc6..4c0de8176 100644 --- a/src/apps/chat/editors/chat-persona.ts +++ b/src/apps/chat/editors/chat-persona.ts @@ -1,11 +1,10 @@ import type { DLLMId } from '~/modules/llms/store-llms'; -import type { AixChatContentGenerateRequest } from '~/modules/aix/client/aix.client.api'; -import type { Intake_ContextChatStream } from '~/modules/aix/server/intake/schemas.intake.api'; +import type { AixAPI_ContextChatStream, AixAPIChatGenerate_Request } from '~/modules/aix/server/aix.wiretypes'; +import { aixChatGenerateRequestFromDMessages } from '~/modules/aix/client/aix.client.fromDMessages.api'; import { aixStreamingChatGenerate, StreamingClientUpdate } from '~/modules/aix/client/aix.client'; import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle'; import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions'; -import { conversationMessagesToAixGenerateRequest } from '~/modules/aix/client/aix.client.messages.api'; import type { DConversationId } from '~/common/stores/chat/chat.conversation'; import { ConversationsManager } from '~/common/chats/ConversationsManager'; @@ -54,7 +53,7 @@ export async function runPersonaOnConversationHead( // stream the assistant's messages directly to the state store - const aixChatContentGenerateRequest = await conversationMessagesToAixGenerateRequest(history); + const aixChatContentGenerateRequest = await aixChatGenerateRequestFromDMessages(history); const messageStatus = await llmGenerateContentStream( assistantLlmId, aixChatContentGenerateRequest, @@ -100,9 +99,9 @@ export type StreamMessageUpdate = Pick, messageComplete: boolean) => void, @@ -119,7 +118,7 @@ export async function llmGenerateContentStream( try { - await aixStreamingChatGenerate(llmId, chatGenerate, intakeContextName, intakeContextRef, abortSignal, + await aixStreamingChatGenerate(llmId, aixChatGenerate, aixContextName, aixContextRef, abortSignal, (update: StreamingClientUpdate, done: boolean) => { // grow the incremental message diff --git a/src/common/stores/chat/chat.fragments.ts b/src/common/stores/chat/chat.fragments.ts index 5b41c7cf2..ff5d3d1db 100644 --- a/src/common/stores/chat/chat.fragments.ts +++ b/src/common/stores/chat/chat.fragments.ts @@ -76,7 +76,7 @@ export type DMessageDocPart = { pt: 'doc', type: DMessageDocMimeType, data: DMes export type DMessageErrorPart = { pt: 'error', error: string }; export type DMessageImageRefPart = { pt: 'image_ref', dataRef: DMessageDataRef, altText?: string, width?: number, height?: number }; export type DMessageTextPart = { pt: 'text', text: string }; -export type DMessageToolInvocationPart = { pt: 'tool_call', id: string, call: DMessageToolInvocationFunctionCall | DMessageToolInvocationCodeExecution }; // Note that the definition of tools is in AIX.Intake +export type DMessageToolInvocationPart = { pt: 'tool_call', id: string, call: DMessageToolInvocationFunctionCall | DMessageToolInvocationCodeExecution }; export type DMessageToolResponsePart = { pt: 'tool_response', id: string, response: DMessageToolResponseFunctionCall | DMessageToolResponseCodeExecution, error?: boolean | string, _environment?: DMessageToolEnvironment }; export type DMetaPlaceholderPart = { pt: 'ph', pText: string }; type _DMetaSentinelPart = { pt: '_pt_sentinel' }; @@ -114,7 +114,8 @@ type DMessageToolInvocationFunctionCall = { type DMessageToolInvocationCodeExecution = { type: 'code_execution'; variant?: 'gemini_auto_inline'; - arguments: { code: string; language?: string; }; + language?: string; + code: string; }; type DMessageToolEnvironment = 'upstream' | 'server' | 'client'; @@ -250,7 +251,7 @@ function createDMessageFunctionCallInvocationPart(id: string, name: string, args } function createDMessageCodeExecutionInvocationPart(id: string, code: string, language?: string, variant?: 'gemini_auto_inline'): DMessageToolInvocationPart { - return { pt: 'tool_call', id, call: { type: 'code_execution', variant, arguments: { code, language } } }; + return { pt: 'tool_call', id, call: { type: 'code_execution', variant, language, code } }; } function createDMessageFunctionCallResponsePart(id: string, result: string, _name?: string, error?: boolean | string, _environment?: DMessageToolEnvironment): DMessageToolResponsePart { @@ -329,7 +330,7 @@ function _duplicatePart Client -- (intake request) --> Server -- (dispatch request) --> AI Service -// -// ** MUST Keep the __intake__ schemas in sync ** -// - -// Chat Content Generation - Request Schema - -export interface AixChatContentGenerateRequest { - systemMessage?: AixSystemMessage; - chatSequence: AixChatMessage[]; - tools?: AixToolDefinition[]; - toolPolicy?: AixToolPolicy; -} - -// export interface GenerationParameters { -// model?: string; -// temperature?: number; -// maxTokens?: number; -// } - - -// System Message Schema - -interface AixSystemMessage { - parts: DMessageTextPart[]; -} - - -// Input Message Schema - -export type AixChatMessage = - | AixChatMessageUser - | AixChatMessageModel - | AixChatMessageTool; - -export interface AixChatMessageUser { - role: 'user', - parts: (DMessageTextPart | AixInlineImagePart | DMessageDocPart | AixMetaReplyToPart)[]; -} - -export interface AixChatMessageModel { - role: 'model', - parts: (DMessageTextPart | AixInlineImagePart | DMessageToolInvocationPart)[]; -} - -interface AixChatMessageTool { - role: 'tool', - parts: DMessageToolResponsePart[]; -} - -export function createAixInlineImagePart(base64: string, mimeType: string): AixInlineImagePart { - return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixInlineImagePart['mimeType'], base64 }; -} - -export function createAixMetaReplyToPart(replyTo: string): AixMetaReplyToPart { - return { pt: 'meta_reply_to', replyTo }; -} - - -// AIX Tools - -export type AixToolDefinition = - | AixToolFunctionCallDefinition - | AixToolGeminiCodeInterpreter - | AixToolPreprocessor; - -export type AixToolPolicy = - | { type: 'any' } - | { type: 'auto' } - | { type: 'function_call'; function_call: { name: string } } - | { type: 'none' }; - - -// AIX Tools > Function Call - -interface AixToolFunctionCallDefinition { - type: 'function_call'; - function_call: AixFunctionCall; -} - -interface AixFunctionCall { - /** - * The name of the function to call. Up to 64 characters long, and can only contain letters, numbers, underscores, and hyphens. - */ - name: string; - /** - * 3-4 sentences. Detailed description of what the tool does, when it should be used (and when not), what each parameter means, caveats and limitations. - */ - description: string; - /** - * A JSON Schema object defining the expected parameters for the function call. - * (OpenAI,Google: parameters, Anthropic: input_schema) - */ - input_schema?: { - properties: Record; - required?: string[]; - }; -} - -/** - * The TypeScript definition of an "OpenAPI 3.0.3" "Schema Object". - * This is a subset of the OpenAPI Schema Object, focused on function calling use cases. - */ -interface OpenAPISchemaObject { - type: 'string' | 'number' | 'integer' | 'boolean' | 'array' | 'object'; - description?: string; - nullable?: boolean; - enum?: unknown[]; // Changed from any[] to unknown[] for better type safety - format?: string; - properties?: Record; - required?: string[]; - items?: OpenAPISchemaObject; -} - - -// AIX Tools > Gemini Code Interpreter - -interface AixToolGeminiCodeInterpreter { - type: 'gemini_code_interpreter'; -} - -// AIX Tools > Preprocessor - -interface AixToolPreprocessor { - type: 'preprocessor'; - pname: 'anthropic_artifacts'; -} diff --git a/src/modules/aix/client/aix.client.messages.api.ts b/src/modules/aix/client/aix.client.fromDMessages.api.ts similarity index 76% rename from src/modules/aix/client/aix.client.messages.api.ts rename to src/modules/aix/client/aix.client.fromDMessages.api.ts index 27caffb70..d7562c65a 100644 --- a/src/modules/aix/client/aix.client.messages.api.ts +++ b/src/modules/aix/client/aix.client.fromDMessages.api.ts @@ -4,8 +4,8 @@ import type { DMessage } from '~/common/stores/chat/chat.message'; import { DMessageImageRefPart, isContentFragment, isContentOrAttachmentFragment, isTextPart } from '~/common/stores/chat/chat.fragments'; import { LLMImageResizeMode, resizeBase64ImageIfNeeded } from '~/common/util/imageUtils'; -import { AixChatContentGenerateRequest, AixChatMessageModel, AixChatMessageUser, createAixInlineImagePart, createAixMetaReplyToPart } from './aix.client.api'; - +// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions +import type { AixAPIChatGenerate_Request, AixMessages_ModelMessage, AixMessages_UserMessage, AixParts_InlineImagePart, AixParts_MetaReplyToPart } from '../server/aix.wiretypes'; // TODO: remove console messages to zero, or replace with throws or something @@ -19,7 +19,7 @@ export const MODEL_IMAGE_RESCALE_QUALITY = 0.90; // AIX <> Chat Messages API helpers // -export async function conversationMessagesToAixGenerateRequest(messageSequence: Readonly): Promise { +export async function aixChatGenerateRequestFromDMessages(messageSequence: Readonly): Promise { // reduce history return await messageSequence.reduce(async (accPromise, m, index) => { const acc = await accPromise; @@ -76,11 +76,11 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence: console.warn('conversationMessagesToAixGenerateRequest: unexpected User fragment part type', (uFragment.part as any).pt); } return uMsg; - }, Promise.resolve({ role: 'user', parts: [] } as AixChatMessageUser)); + }, Promise.resolve({ role: 'user', parts: [] } as AixMessages_UserMessage)); // handle metadata on user messages if (m.metadata?.inReplyToText) - aixChatMessageUser.parts.push(createAixMetaReplyToPart(m.metadata.inReplyToText)); + aixChatMessageUser.parts.push(_clientCreateAixMetaReplyToPart(m.metadata.inReplyToText)); acc.chatSequence.push(aixChatMessageUser); @@ -95,9 +95,10 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence: switch (aFragment.part.pt) { - // intake.message.part = fragment.part case 'text': case 'tool_call': + // Key place where the Aix Zod inferred types are compared to the Typescript defined DMessagePart* types + // - in case of error, check that the types in `chat.fragments.ts` and `aix.wiretypes.ts` are in sync mMsg.parts.push(aFragment.part); break; @@ -125,7 +126,7 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence: } return mMsg; - }, Promise.resolve({ role: 'model', parts: [] } as AixChatMessageModel)); + }, Promise.resolve({ role: 'model', parts: [] } as AixMessages_ModelMessage)); acc.chatSequence.push(aixChatMessageModel); @@ -135,10 +136,13 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence: } return acc; - }, Promise.resolve({ chatSequence: [] } as AixChatContentGenerateRequest)); + }, Promise.resolve({ chatSequence: [] } as AixAPIChatGenerate_Request)); } -async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false) { + +/// Parts that differ from DMessage*Part to AIX + +async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false): Promise { // validate const { dataRef } = imageRefPart; @@ -164,5 +168,13 @@ async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageR } } - return createAixInlineImagePart(base64Data, mimeType || dataRef.mimeType); + return _clientCreateAixInlineImagePart(base64Data, mimeType || dataRef.mimeType); +} + +function _clientCreateAixInlineImagePart(base64: string, mimeType: string): AixParts_InlineImagePart { + return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixParts_InlineImagePart['mimeType'], base64 }; +} + +function _clientCreateAixMetaReplyToPart(replyTo: string): AixParts_MetaReplyToPart { + return { pt: 'meta_reply_to', replyTo }; } diff --git a/src/modules/aix/client/aix.client.ts b/src/modules/aix/client/aix.client.ts index 59bb2016b..a82cbe40c 100644 --- a/src/modules/aix/client/aix.client.ts +++ b/src/modules/aix/client/aix.client.ts @@ -4,9 +4,8 @@ import { findVendorForLlmOrThrow } from '~/modules/llms/vendors/vendors.registry import { apiStream } from '~/common/util/trpc.client'; -import type { Intake_Access, Intake_ContextChatStream, Intake_Model } from '../server/intake/schemas.intake.api'; - -import type { AixChatContentGenerateRequest } from './aix.client.api'; +// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions +import type { AixAPI_Access, AixAPI_ContextChatStream, AixAPI_Model, AixAPIChatGenerate_Request } from '~/modules/aix/server/aix.wiretypes'; export type StreamingClientUpdate = Partial<{ @@ -18,48 +17,44 @@ export type StreamingClientUpdate = Partial<{ export async function aixStreamingChatGenerate( llmId: DLLMId, - chatGenerate: AixChatContentGenerateRequest, - intakeContextName: Intake_ContextChatStream['name'], - intakeContextRef: string, + aixChatGenerate: AixAPIChatGenerate_Request, + aixContextName: AixAPI_ContextChatStream['name'], + aixContextRef: AixAPI_ContextChatStream['ref'], abortSignal: AbortSignal, onUpdate: (update: StreamingClientUpdate, done: boolean) => void, ): Promise { - // id to DLLM and vendor + // Aix Access const { llm, vendor } = findVendorForLlmOrThrow(llmId); - - // FIXME: relax the forced cast - // const llmOptions = llm.options; - const intakeModel = intakeModelFromLLMOptions(llm.options, llmId); - - // get the access const partialSourceSetup = llm._source.setup; - const intakeAccess = vendor.getTransportAccess(partialSourceSetup); + const aixAccess = vendor.getTransportAccess(partialSourceSetup); - // get any vendor-specific rate limit delay + // Aix Model - FIXME: relax the forced cast + // const llmOptions = llm.options; + const aixModel = _aixModelFromLLMOptions(llm.options, llmId); + + // Aix Context + const aixContext = { method: 'chat-stream', name: aixContextName, ref: aixContextRef } as const; + + // Simple rate limiting (vendor-specific) const delay = vendor.getRateLimitDelay?.(llm, partialSourceSetup) ?? 0; if (delay > 0) await new Promise(resolve => setTimeout(resolve, delay)); // [OpenAI-only] check for harmful content with the free 'moderation' API, if the user requests so - // if (intakeAccess.dialect === 'openai' && intakeAccess.moderationCheck) { - // const moderationUpdate = await _openAIModerationCheck(intakeAccess, messages.at(-1) ?? null); + // if (aixAccess.dialect === 'openai' && aixAccess.moderationCheck) { + // const moderationUpdate = await _openAIModerationCheck(aixAccess, messages.at(-1) ?? null); // if (moderationUpdate) // return onUpdate({ textSoFar: moderationUpdate, typing: false }, true); // } - // execute via the vendor - // return await vendor.streamingChatGenerateOrThrow(intakeAccess, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate); - const intakeContext = intakeContextChatStream(intakeContextName, intakeContextRef); - return await _aixStreamGenerateUnified(intakeAccess, intakeModel, chatGenerate, intakeContext, abortSignal, onUpdate); + // return await vendor.streamingChatGenerateOrThrow(aixAccess, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate); + return await _aixChatGenerateContent(aixAccess, aixModel, aixChatGenerate, aixContext, abortSignal, onUpdate); } -function intakeContextChatStream(name: Intake_ContextChatStream['name'], ref: string): Intake_ContextChatStream { - return { method: 'chat-stream', name, ref }; -} -function intakeModelFromLLMOptions(llmOptions: Record, debugLlmId: string): Intake_Model { +function _aixModelFromLLMOptions(llmOptions: Record, debugLlmId: string): AixAPI_Model { // model params (llm) const { llmRef, llmTemperature, llmResponseTokens } = llmOptions || {}; if (!llmRef || llmTemperature === undefined) @@ -82,19 +77,19 @@ function intakeModelFromLLMOptions(llmOptions: Record, debugLlmId: * * NOTE: onUpdate is callback when a piece of a message (text, model name, typing..) is received */ -async function _aixStreamGenerateUnified( - // input - access: Intake_Access, - model: Intake_Model, - chatGenerate: AixChatContentGenerateRequest, - context: Intake_ContextChatStream, +async function _aixChatGenerateContent( + // aix inputs + aixAccess: AixAPI_Access, + aixModel: AixAPI_Model, + aixChatGenerate: AixAPIChatGenerate_Request, + aixContext: AixAPI_ContextChatStream, // others abortSignal: AbortSignal, onUpdate: (update: StreamingClientUpdate, done: boolean) => void, ): Promise { const operation = await apiStream.aix.chatGenerateContent.mutate( - { access, model, chatGenerate, context, streaming: true, _debugRequestBody: false }, + { access: aixAccess, model: aixModel, chatGenerate: aixChatGenerate, context: aixContext, streaming: true, _debugRequestBody: false }, { signal: abortSignal }, ); diff --git a/src/modules/aix/server/aix.router.ts b/src/modules/aix/server/aix.router.ts index 03389c821..6828359c2 100644 --- a/src/modules/aix/server/aix.router.ts +++ b/src/modules/aix/server/aix.router.ts @@ -4,10 +4,10 @@ import { createEmptyReadableStream, safeErrorString, serverCapitalizeFirstLetter import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server'; import { fetchResponseOrTRPCThrow } from '~/server/api/trpc.router.fetchers'; +import { AixWire_API, AixWire_API_ChatGenerate } from './aix.wiretypes'; import { IntakeHandler } from './intake/IntakeHandler'; import { createChatGenerateDispatch } from './dispatch/chatGenerate/chatGenerate.dispatch'; import { createStreamDemuxer } from './dispatch/stream.demuxers'; -import { intake_Access_schema, intake_ChatGenerateRequest_schema, intake_ContextChatStream_schema, intake_Model_schema } from './intake/schemas.intake.api'; export const aixRouter = createTRPCRouter({ @@ -18,10 +18,10 @@ export const aixRouter = createTRPCRouter({ */ chatGenerateContent: publicProcedure .input(z.object({ - access: intake_Access_schema, - model: intake_Model_schema, - chatGenerate: intake_ChatGenerateRequest_schema, - context: intake_ContextChatStream_schema, + access: AixWire_API.Access_schema, + model: AixWire_API.Model_schema, + chatGenerate: AixWire_API_ChatGenerate.Request_schema, + context: AixWire_API.Context_schema, streaming: z.boolean(), _debugRequestBody: z.boolean().optional(), })) diff --git a/src/modules/aix/server/aix.wiretypes.ts b/src/modules/aix/server/aix.wiretypes.ts index 3f9eed264..577970074 100644 --- a/src/modules/aix/server/aix.wiretypes.ts +++ b/src/modules/aix/server/aix.wiretypes.ts @@ -14,11 +14,22 @@ import { openAIAccessSchema } from '~/modules/llms/server/openai/openai.router'; // Export types -// export type IntakeWire_DocPart = z.infer; -// export type IntakeWire_InlineImagePart = z.infer; -// export type IntakeWire_MetaReplyToPart = z.infer; -// export type IntakeWire_ChatMessage = z.infer; -// export type IntakeWire_SystemMessage = z.infer; +export type AixParts_DocPart = z.infer; +export type AixParts_InlineImagePart = z.infer; +export type AixParts_MetaReplyToPart = z.infer; + +export type AixMessages_SystemMessage = z.infer; +export type AixMessages_UserMessage = z.infer; +export type AixMessages_ModelMessage = z.infer; +export type AixMessages_ChatMessage = z.infer; + +export type AixTools_ToolDefinition = z.infer; +export type AixTools_ToolsPolicy = z.infer; + +export type AixAPI_Access = z.infer; +export type AixAPI_ContextChatStream = z.infer; +export type AixAPI_Model = z.infer; +export type AixAPIChatGenerate_Request = z.infer; export namespace OpenAPI_Schema { @@ -137,10 +148,8 @@ export namespace AixWire_Parts { const _CodeExecutionInvocation_schema = z.object({ type: z.literal('code_execution'), variant: z.literal('gemini_auto_inline').optional(), - arguments: z.object({ - code: z.string(), - language: z.string().optional(), - }), + language: z.string().optional(), + code: z.string(), }); export const ToolInvocationPart_schema = z.object({ @@ -157,7 +166,7 @@ export namespace AixWire_Parts { const _FunctionCallResponse_schema = z.object({ type: z.literal('function_call'), result: z.string(), - // _name: z.string().optional(), + _name: z.string().optional(), }); const _CodeExecutionResponse_schema = z.object({ @@ -196,7 +205,7 @@ export namespace AixWire_Messages { /// Chat Message - const _UserMessage_schema = z.object({ + export const UserMessage_schema = z.object({ role: z.literal('user'), parts: z.array(z.discriminatedUnion('pt', [ AixWire_Parts.TextPart_schema, @@ -206,7 +215,7 @@ export namespace AixWire_Messages { ])), }); - const _ModelMessage_schema = z.object({ + export const ModelMessage_schema = z.object({ role: z.literal('model'), parts: z.array(z.discriminatedUnion('pt', [ AixWire_Parts.TextPart_schema, @@ -215,7 +224,7 @@ export namespace AixWire_Messages { ])), }); - const _ToolMessage_schema = z.object({ + export const ToolMessage_schema = z.object({ role: z.literal('tool'), parts: z.array(z.discriminatedUnion('pt', [ AixWire_Parts.ToolResponsePart_schema, @@ -223,9 +232,9 @@ export namespace AixWire_Messages { }); export const ChatMessage_schema = z.discriminatedUnion('role', [ - _UserMessage_schema, - _ModelMessage_schema, - _ToolMessage_schema, + UserMessage_schema, + ModelMessage_schema, + ToolMessage_schema, ]); } @@ -339,6 +348,18 @@ export namespace AixWire_API { maxTokens: z.number().min(1).max(1000000).optional(), }); + /// Context + + export const ContextChatStream_schema = z.object({ + method: z.literal('chat-stream'), + name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']), + ref: z.string(), + }); + + export const Context_schema = z.discriminatedUnion('method', [ + ContextChatStream_schema, + ]); + } export namespace AixWire_API_ChatGenerate { @@ -352,12 +373,6 @@ export namespace AixWire_API_ChatGenerate { toolsPolicy: AixWire_Tools.ToolsPolicy_schema.optional(), }); - export const Context_ChatStream_schema = z.object({ - method: z.literal('chat-stream'), - name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']), - ref: z.string(), - }); - /// Response - Events Stream // const AixEventProto_schema = z.union([ @@ -375,3 +390,4 @@ export namespace AixWire_API_ChatGenerate { // }); } + diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts index 3677d1fb3..9c894daea 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/anthropic.messageCreate.ts @@ -1,7 +1,5 @@ -import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api'; -import type { Intake_ChatMessage } from '../../../intake/schemas.intake.messages'; -import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools'; -import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '~/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes'; +import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes'; +import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '../../wiretypes/anthropic.wiretypes'; // configuration @@ -12,7 +10,7 @@ const hotFixMissingTokens = 4096; // [2024-07-12] max from https://docs.anthropi type TRequest = AnthropicWire_API_Message_Create.Request; -export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, streaming: boolean): TRequest { +export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest { // Convert the system message const systemMessage: TRequest['system'] = chatGenerate.systemMessage?.parts.length @@ -22,8 +20,8 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate // Transform the chat messages into Anthropic's format const chatMessages: TRequest['messages'] = []; let currentMessage: TRequest['messages'][number] | null = null; - for (const intakeChatMessage of chatGenerate.chatSequence) { - for (const { role, content } of _generateAnthropicMessagesContentBlocks(intakeChatMessage)) { + for (const aixMessage of chatGenerate.chatSequence) { + for (const { role, content } of _generateAnthropicMessagesContentBlocks(aixMessage)) { if (!currentMessage || currentMessage.role !== role) { if (currentMessage) chatMessages.push(currentMessage); @@ -41,8 +39,8 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate model: model.id, system: systemMessage, messages: chatMessages, - tools: chatGenerate.tools && _intakeToAnthropicTools(chatGenerate.tools), - tool_choice: chatGenerate.toolsPolicy && _intakeToAnthropicToolChoice(chatGenerate.toolsPolicy), + tools: chatGenerate.tools && _toAnthropicTools(chatGenerate.tools), + tool_choice: chatGenerate.toolsPolicy && _toAnthropicToolChoice(chatGenerate.toolsPolicy), // metadata: { user_id: ... } // stop_sequences: undefined, stream: streaming, @@ -60,7 +58,7 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate } -function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMessage): Generator<{ +function* _generateAnthropicMessagesContentBlocks({ parts, role }: AixMessages_ChatMessage): Generator<{ role: 'user' | 'assistant', content: TRequest['messages'][number]['content'][number] }> { @@ -75,21 +73,27 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe } switch (role) { + case 'user': for (const part of parts) { switch (part.pt) { + case 'text': yield { role: 'user', content: AnthropicWire_Blocks.TextBlock(part.text) }; break; + case 'inline_image': yield { role: 'user', content: AnthropicWire_Blocks.ImageBlock(part.mimeType, part.base64) }; break; + case 'doc': yield { role: 'user', content: AnthropicWire_Blocks.TextBlock('```' + (part.ref || '') + '\n' + part.data.text + '\n```\n') }; break; + case 'meta_reply_to': yield { role: 'user', content: AnthropicWire_Blocks.TextBlock(`The user is referring to this in particular: ${part.replyTo}`) }; break; + default: throw new Error(`Unsupported part type in User message: ${(part as any).pt}`); } @@ -99,9 +103,11 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe case 'model': for (const part of parts) { switch (part.pt) { + case 'text': yield { role: 'assistant', content: AnthropicWire_Blocks.TextBlock(part.text) }; break; + case 'inline_image': // Example of mapping a model-generated image to a user message if (hotFixMapModelImagesToUser) { @@ -109,9 +115,22 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe } else throw new Error('Model-generated images are not supported by Anthropic yet'); break; + case 'tool_call': - yield { role: 'assistant', content: AnthropicWire_Blocks.ToolUseBlock(part.id, part.name, part.args) }; + let toolUseBlock; + switch (part.call.type) { + case 'function_call': + toolUseBlock = AnthropicWire_Blocks.ToolUseBlock(part.id, part.call.name, part.call.args); + break; + case 'code_execution': + toolUseBlock = AnthropicWire_Blocks.ToolUseBlock(part.id, 'execute_code' /* suboptimal */, part.call.code); + break; + default: + throw new Error(`Unsupported tool call type in Model message: ${(part.call as any).type}`); + } + yield { role: 'assistant', content: toolUseBlock }; break; + default: throw new Error(`Unsupported part type in Model message: ${(part as any).pt}`); } @@ -121,10 +140,23 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe case 'tool': for (const part of parts) { switch (part.pt) { + case 'tool_response': - const responseContent = part.response ? [AnthropicWire_Blocks.TextBlock(part.response)] : []; - yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, responseContent, part.isError) }; + const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : ''; + switch (part.response.type) { + case 'function_call': + const fcTextParts = [AnthropicWire_Blocks.TextBlock(toolErrorPrefix + part.response.result)]; + yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, fcTextParts, part.error ? true : undefined) }; + break; + case 'code_execution': + const ceTextParts = [AnthropicWire_Blocks.TextBlock(toolErrorPrefix + part.response.result)]; + yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, ceTextParts, part.error ? true : undefined) }; + break; + default: + throw new Error(`Unsupported tool response type in Tool message: ${(part as any).pt}`); + } break; + default: throw new Error(`Unsupported part type in Tool message: ${(part as any).pt}`); } @@ -133,9 +165,10 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe } } -function _intakeToAnthropicTools(itds: Intake_ToolDefinition[]): NonNullable { +function _toAnthropicTools(itds: AixTools_ToolDefinition[]): NonNullable { return itds.map(itd => { switch (itd.type) { + case 'function_call': const { name, description, input_schema } = itd.function_call; return { @@ -147,15 +180,15 @@ function _intakeToAnthropicTools(itds: Intake_ToolDefinition[]): NonNullable { +function _toAnthropicToolChoice(itp: AixTools_ToolsPolicy): NonNullable { switch (itp.type) { case 'auto': return { type: 'auto' as const }; diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts index 7f98b8cb6..fb70e568e 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/gemini.generateContent.ts @@ -1,7 +1,4 @@ -import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api'; -import type { Intake_ChatMessage, Intake_DocPart } from '../../../intake/schemas.intake.messages'; -import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools'; - +import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixParts_DocPart, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes'; import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Messages, GeminiWire_Safety } from '../../wiretypes/gemini.wiretypes'; @@ -9,7 +6,7 @@ import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Me const hotFixImagePartsFirst = true; -export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest { +export function aixToGeminiGenerateContent(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest { // Note: the streaming setting is ignored as it only belongs in the path @@ -19,13 +16,13 @@ export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate: : undefined; // Chat Messages - const contents: TRequest['contents'] = _intakeToGeminiContents(chatGenerate.chatSequence); + const contents: TRequest['contents'] = _toGeminiContents(chatGenerate.chatSequence); // Construct the request payload const payload: TRequest = { contents, - tools: chatGenerate.tools && _intakeToGeminiTools(chatGenerate.tools), - toolConfig: chatGenerate.toolsPolicy && _intakeToGeminiToolConfig(chatGenerate.toolsPolicy), + tools: chatGenerate.tools && _toGeminiTools(chatGenerate.tools), + toolConfig: chatGenerate.toolsPolicy && _toGeminiToolConfig(chatGenerate.toolsPolicy), safetySettings: _toGeminiSafetySettings(geminiSafetyThreshold), systemInstruction, generationConfig: { @@ -51,7 +48,7 @@ export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate: type TRequest = GeminiWire_API_Generate_Content.Request; -function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire_Messages.Content[] { +function _toGeminiContents(chatSequence: AixMessages_ChatMessage[]): GeminiWire_Messages.Content[] { return chatSequence.map(message => { const parts: GeminiWire_ContentParts.ContentPart[] = []; @@ -83,12 +80,33 @@ function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire break; case 'tool_call': - parts.push(GeminiWire_ContentParts.FunctionCallPart(part.name, part.args)); + switch (part.call.type) { + case 'function_call': + parts.push(GeminiWire_ContentParts.FunctionCallPart(part.call.name, part.call.args ?? undefined)); + break; + case 'code_execution': + if (part.call.language?.toLowerCase() !== 'python') + console.warn('Gemini only supports Python code execution, but got:', part.call.language); + parts.push(GeminiWire_ContentParts.ExecutableCodePart('PYTHON', part.call.code)); + break; + default: + throw new Error(`Unsupported tool call type in message: ${(part as any).call.type}`); + } break; case 'tool_response': - parts.push(GeminiWire_ContentParts.FunctionResponsePart(part.name, { response: part.response, isError: part.isError })); - throw new Error('Tool responses are not supported yet - Gemini Expects Objects, but we have a string...'); + const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : ''; + switch (part.response.type) { + case 'function_call': + parts.push(GeminiWire_ContentParts.FunctionResponsePart(part.response._name || part.id, toolErrorPrefix + part.response.result)); + break; + case 'code_execution': + parts.push(GeminiWire_ContentParts.CodeExecutionResultPart(!part.error ? 'OUTCOME_OK' : 'OUTCOME_ERROR', toolErrorPrefix + part.response.result)); + break; + default: + throw new Error(`Unsupported part type in message: ${(part as any).pt}`); + } + break; default: throw new Error(`Unsupported part type in message: ${(part as any).pt}`); @@ -102,7 +120,7 @@ function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire }); } -function _intakeToGeminiTools(itds: Intake_ToolDefinition[]): NonNullable { +function _toGeminiTools(itds: AixTools_ToolDefinition[]): NonNullable { const tools: TRequest['tools'] = []; itds.forEach(itd => { @@ -116,30 +134,29 @@ function _intakeToGeminiTools(itds: Intake_ToolDefinition[]): NonNullable tool.codeExecution)) throw new Error('Gemini code interpreter already defined'); + tools.push({ codeExecution: {} }); break; - case 'preprocessor': - throw new Error('Preprocessors are not supported yet'); } }); return tools; } -function _intakeToGeminiToolConfig(itp: Intake_ToolsPolicy): NonNullable { +function _toGeminiToolConfig(itp: AixTools_ToolsPolicy): NonNullable { switch (itp.type) { case 'auto': return { functionCallingConfig: { mode: 'AUTO' } }; @@ -167,8 +184,8 @@ function _toGeminiSafetySettings(threshold: GeminiWire_Safety.HarmBlockThreshold // Approximate conversions - alternative approaches should be tried until we find the best one -function _toApproximateGeminiDocPart(intakeDocPart: Intake_DocPart): GeminiWire_ContentParts.ContentPart { - return GeminiWire_ContentParts.TextPart(`\`\`\`${intakeDocPart.ref || ''}\n${intakeDocPart.data.text}\n\`\`\`\n`); +function _toApproximateGeminiDocPart(aixPartsDocPart: AixParts_DocPart): GeminiWire_ContentParts.ContentPart { + return GeminiWire_ContentParts.TextPart(`\`\`\`${aixPartsDocPart.ref || ''}\n${aixPartsDocPart.data.text}\n\`\`\`\n`); } function _toApproximateGeminiReplyTo(replyTo: string): GeminiWire_ContentParts.ContentPart { diff --git a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts index 1a9c7c12c..d7fc0e51f 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/adapters/openai.chatCompletions.ts @@ -1,10 +1,7 @@ -import { OpenAIDialects } from '~/modules/llms/server/openai/openai.router'; +import type { OpenAIDialects } from '~/modules/llms/server/openai/openai.router'; -import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api'; -import type { Intake_ChatMessage, Intake_SystemMessage } from '../../../intake/schemas.intake.messages'; -import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools'; - -import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts } from '../../wiretypes/openai.wiretypes'; +import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixMessages_SystemMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes'; +import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts, OpenAIWire_Messages } from '../../wiretypes/openai.wiretypes'; // @@ -29,7 +26,7 @@ const hotFixSquashTextSeparator = '\n\n\n---\n\n\n'; type TRequest = OpenAIWire_API_Chat_Completions.Request; type TRequestMessages = TRequest['messages']; -export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, jsonOutput: boolean, streaming: boolean): TRequest { +export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest { // Dialect incompatibilities -> Hotfixes const hotFixAlternateUserAssistantRoles = openAIDialect === 'perplexity'; @@ -44,7 +41,7 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model throw new Error('This service does not support function calls'); // Convert the chat messages to the OpenAI 4-Messages format - let chatMessages = _intakeToOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence); + let chatMessages = _toOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence); // Apply hotfixes if (hotFixSquashMultiPartText) @@ -61,8 +58,8 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model let payload: TRequest = { model: model.id, messages: chatMessages, - tools: chatGenerate.tools && _intakeToOpenAITools(chatGenerate.tools), - tool_choice: chatGenerate.toolsPolicy && _intakeToOpenAIToolChoice(chatGenerate.toolsPolicy), + tools: chatGenerate.tools && _toOpenAITools(chatGenerate.tools), + tool_choice: chatGenerate.toolsPolicy && _toOpenAIToolChoice(chatGenerate.toolsPolicy), parallel_tool_calls: undefined, max_tokens: model.maxTokens !== undefined ? model.maxTokens : undefined, temperature: model.temperature !== undefined ? model.temperature : undefined, @@ -90,6 +87,7 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model return validated.data; } + function _fixAlternateUserAssistantRoles(chatMessages: TRequestMessages): TRequestMessages { return chatMessages.reduce((acc, historyItem) => { @@ -159,7 +157,7 @@ function _fixSquashMultiPartText(chatMessages: TRequestMessages): TRequestMessag }*/ -function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined, chatSequence: Intake_ChatMessage[]): TRequestMessages { +function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | undefined, chatSequence: AixMessages_ChatMessage[]): TRequestMessages { // Transform the chat messages into OpenAI's format (an array of 'system', 'user', 'assistant', and 'tool' messages) const chatMessages: TRequestMessages = []; @@ -258,8 +256,17 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined // - otherwise we'll add an assistant message with null message // create a new OpenAIWire_ToolCall (specialized to function) - const toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, part.name, JSON.stringify(part.args)); - + let toolCallPart; + switch (part.call.type) { + case 'function_call': + toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, part.call.name, part.call.args || ''); + break; + case 'code_execution': + toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, 'execute_code' /* suboptimal */, part.call.code || ''); + break; + default: + throw new Error(`Unsupported tool call type in Model message: ${(part as any).pt}`); + } // Append to existing content[], or new message if (currentMessage?.role === 'assistant') { @@ -283,7 +290,11 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined switch (part.pt) { case 'tool_response': - chatMessages.push({ role: 'tool', tool_call_id: part.id, content: part.isError ? '[ERROR]' + (part.response || '') : (part.response || '') }); + const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : ''; + if (part.response.type === 'function_call' || part.response.type === 'code_execution') + chatMessages.push(OpenAIWire_Messages.ToolMessage(part.id, toolErrorPrefix + part.response.result)); + else + throw new Error(`Unsupported tool response type in Tool message: ${(part as any).pt}`); break; default: @@ -297,10 +308,10 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined return chatMessages; } - -function _intakeToOpenAITools(itds: Intake_ToolDefinition[]): NonNullable { +function _toOpenAITools(itds: AixTools_ToolDefinition[]): NonNullable { return itds.map(itd => { switch (itd.type) { + case 'function_call': const { name, description, input_schema } = itd.function_call; return { @@ -315,15 +326,15 @@ function _intakeToOpenAITools(itds: Intake_ToolDefinition[]): NonNullable { +function _toOpenAIToolChoice(itp: AixTools_ToolsPolicy): NonNullable { // NOTE: OpenAI has an additional policy 'none', which we don't have as it behaves like passing no tools at all. // Passing no tools is mandated instead of 'none'. switch (itp.type) { diff --git a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts index c06164862..cc96ccfe9 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/chatGenerate.dispatch.ts @@ -2,13 +2,13 @@ import { anthropicAccess } from '~/modules/llms/server/anthropic/anthropic.route import { geminiAccess } from '~/modules/llms/server/gemini/gemini.router'; import { openAIAccess } from '~/modules/llms/server/openai/openai.router'; -import type { Intake_Access, Intake_ChatGenerateRequest, Intake_Model } from '../../intake/schemas.intake.api'; +import type { AixAPI_Access, AixAPI_Model, AixAPIChatGenerate_Request } from '../../aix.wiretypes'; import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes'; -import { intakeToAnthropicMessageCreate } from './adapters/anthropic.messageCreate'; -import { intakeToGeminiGenerateContent } from './adapters/gemini.generateContent'; -import { intakeToOpenAIMessageCreate } from './adapters/openai.chatCompletions'; +import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate'; +import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent'; +import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions'; import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser'; import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser'; @@ -18,7 +18,7 @@ import type { ChatGenerateParseFunction } from './chatGenerate.types'; import type { StreamDemuxerFormat } from '../stream.demuxers'; -export function createChatGenerateDispatch(access: Intake_Access, model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, streaming: boolean): { +export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): { request: { url: string, headers: HeadersInit, body: object }, demuxerFormat: StreamDemuxerFormat; chatGenerateParse: ChatGenerateParseFunction; @@ -29,7 +29,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_ return { request: { ...anthropicAccess(access, '/v1/messages'), - body: intakeToAnthropicMessageCreate(model, chatGenerate, streaming), + body: aixToAnthropicMessageCreate(model, chatGenerate, streaming), }, demuxerFormat: streaming ? 'sse' : null, chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(), @@ -39,7 +39,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_ return { request: { ...geminiAccess(access, model.id, streaming ? GeminiWire_API_Generate_Content.streamingPostPath : GeminiWire_API_Generate_Content.postPath), - body: intakeToGeminiGenerateContent(model, chatGenerate, access.minSafetyLevel, false, streaming), + body: aixToGeminiGenerateContent(model, chatGenerate, access.minSafetyLevel, false, streaming), }, demuxerFormat: streaming ? 'sse' : null, chatGenerateParse: createGeminiGenerateContentResponseParser(model.id), @@ -70,7 +70,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_ return { request: { ...openAIAccess(access, model.id, '/v1/chat/completions'), - body: intakeToOpenAIMessageCreate(access.dialect, model, chatGenerate, false, streaming), + body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming), }, demuxerFormat: streaming ? 'sse' : null, chatGenerateParse: createOpenAIMessageCreateParser(), diff --git a/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts index a79ae3227..b1c79a39e 100644 --- a/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts @@ -64,14 +64,18 @@ export namespace GeminiWire_ContentParts { export const FunctionCallPart_schema = z.object({ functionCall: z.object({ name: z.string(), - args: z.record(z.any()).optional(), // JSON object format + // NOTE: replacing with a single string for now, leaving serialization to the caller eventually + args: z.string().optional(), + // args: z.record(z.any()).optional(), // JSON object format }), }); const FunctionResponsePart_schema = z.object({ functionResponse: z.object({ name: z.string(), - response: z.record(z.any()), // Optional. JSON object format + // NOTE: replacing with a single string for now, leaving serialization to the caller eventually + response: z.string().optional(), + // response: z.record(z.any()), // Optional. JSON object format }), }); @@ -123,14 +127,22 @@ export namespace GeminiWire_ContentParts { return { inlineData: { mimeType, data } }; } - export function FunctionCallPart(name: string, args?: Record): z.infer { + export function FunctionCallPart(name: string, args?: string): z.infer { return { functionCall: { name, args } }; } - export function FunctionResponsePart(name: string, response: Record): z.infer { + export function FunctionResponsePart(name: string, response?: string): z.infer { return { functionResponse: { name, response } }; } + export function ExecutableCodePart(language: 'PYTHON', code: string): z.infer { + return { executableCode: { language, code } }; + } + + export function CodeExecutionResultPart(outcome: 'OUTCOME_OK' | 'OUTCOME_ERROR', output?: string): z.infer { + return { codeExecutionResult: { outcome, output } }; + } + } export namespace GeminiWire_Messages { @@ -185,7 +197,14 @@ export namespace GeminiWire_Tools { * https://ai.google.dev/api/rest/v1beta/cachedContents#schema * Here we relax the check. */ - parameters: z.record(z.any()).optional(), + parameters: z.object({ + type: z.literal('object'), + /** + * For stricter validation, use the OpenAPI_Schema.Object_schema + */ + properties: z.record(z.any()).optional(), + required: z.array(z.string()).optional(), + }), }); export const Tool_schema = z.object({ diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index c26337a53..94524e9de 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -1,7 +1,5 @@ import { z } from 'zod'; -import { openAPI_SchemaObject_schema } from '../../intake/schemas.intake.tools'; - // // Implementation notes: @@ -60,14 +58,14 @@ export namespace OpenAIWire_ContentParts { }), }); - export const ToolCall_schema = z.discriminatedUnion('type', [ - PredictedFunctionCall_schema, - ]); - export function PredictedFunctionCall(toolCallId: string, functionName: string, functionArgs: string): z.infer { return { type: 'function', id: toolCallId, function: { name: functionName, arguments: functionArgs } }; } + export const ToolCall_schema = z.discriminatedUnion('type', [ + PredictedFunctionCall_schema, + ]); + } export namespace OpenAIWire_Messages { @@ -107,6 +105,10 @@ export namespace OpenAIWire_Messages { tool_call_id: z.string(), }); + export function ToolMessage(toolCallId: string, content: string): z.infer { + return { role: 'tool', content, tool_call_id: toolCallId }; + } + export const Message_schema = z.discriminatedUnion('role', [ SystemMessage_schema, UserMessage_schema, @@ -138,16 +140,12 @@ export namespace OpenAIWire_Tools { */ parameters: z.object({ type: z.literal('object'), - properties: z.record(openAPI_SchemaObject_schema), - // Note: We commented out the code below in favor of the line above, because the OpenAPI 3.0.3 Schema object - // is in the 'Intake' API spec (.properties), and here we just need to pass that object upstream. - // properties: z.record(z.object({ - // type: z.enum(['string', 'number', 'integer', 'boolean']), - // description: z.string().optional(), - // enum: z.array(z.string()).optional(), - // })), + /** + * For stricter validation, use the OpenAPI_Schema.Object_schema + */ + properties: z.record(z.any()).optional(), required: z.array(z.string()).optional(), - }).optional(), + }), }); export const ToolDefinition_schema = z.discriminatedUnion('type', [