AIX: Massive updates

This commit is contained in:
Enrico Ros
2024-07-15 21:49:39 -07:00
parent 6fd393949c
commit 02ca84a467
13 changed files with 271 additions and 310 deletions
+7 -8
View File
@@ -1,11 +1,10 @@
import type { DLLMId } from '~/modules/llms/store-llms';
import type { AixChatContentGenerateRequest } from '~/modules/aix/client/aix.client.api';
import type { Intake_ContextChatStream } from '~/modules/aix/server/intake/schemas.intake.api';
import type { AixAPI_ContextChatStream, AixAPIChatGenerate_Request } from '~/modules/aix/server/aix.wiretypes';
import { aixChatGenerateRequestFromDMessages } from '~/modules/aix/client/aix.client.fromDMessages.api';
import { aixStreamingChatGenerate, StreamingClientUpdate } from '~/modules/aix/client/aix.client';
import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle';
import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
import { conversationMessagesToAixGenerateRequest } from '~/modules/aix/client/aix.client.messages.api';
import type { DConversationId } from '~/common/stores/chat/chat.conversation';
import { ConversationsManager } from '~/common/chats/ConversationsManager';
@@ -54,7 +53,7 @@ export async function runPersonaOnConversationHead(
// stream the assistant's messages directly to the state store
const aixChatContentGenerateRequest = await conversationMessagesToAixGenerateRequest(history);
const aixChatContentGenerateRequest = await aixChatGenerateRequestFromDMessages(history);
const messageStatus = await llmGenerateContentStream(
assistantLlmId,
aixChatContentGenerateRequest,
@@ -100,9 +99,9 @@ export type StreamMessageUpdate = Pick<DMessage, 'fragments' | 'originLLM' | 'pe
export async function llmGenerateContentStream(
llmId: DLLMId,
chatGenerate: AixChatContentGenerateRequest,
intakeContextName: Intake_ContextChatStream['name'],
intakeContextRef: string,
aixChatGenerate: AixAPIChatGenerate_Request,
aixContextName: AixAPI_ContextChatStream['name'],
aixContextRef: AixAPI_ContextChatStream['ref'],
parallelViewCount: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
abortSignal: AbortSignal,
onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
@@ -119,7 +118,7 @@ export async function llmGenerateContentStream(
try {
await aixStreamingChatGenerate(llmId, chatGenerate, intakeContextName, intakeContextRef, abortSignal,
await aixStreamingChatGenerate(llmId, aixChatGenerate, aixContextName, aixContextRef, abortSignal,
(update: StreamingClientUpdate, done: boolean) => {
// grow the incremental message
+5 -4
View File
@@ -76,7 +76,7 @@ export type DMessageDocPart = { pt: 'doc', type: DMessageDocMimeType, data: DMes
export type DMessageErrorPart = { pt: 'error', error: string };
export type DMessageImageRefPart = { pt: 'image_ref', dataRef: DMessageDataRef, altText?: string, width?: number, height?: number };
export type DMessageTextPart = { pt: 'text', text: string };
export type DMessageToolInvocationPart = { pt: 'tool_call', id: string, call: DMessageToolInvocationFunctionCall | DMessageToolInvocationCodeExecution }; // Note that the definition of tools is in AIX.Intake
export type DMessageToolInvocationPart = { pt: 'tool_call', id: string, call: DMessageToolInvocationFunctionCall | DMessageToolInvocationCodeExecution };
export type DMessageToolResponsePart = { pt: 'tool_response', id: string, response: DMessageToolResponseFunctionCall | DMessageToolResponseCodeExecution, error?: boolean | string, _environment?: DMessageToolEnvironment };
export type DMetaPlaceholderPart = { pt: 'ph', pText: string };
type _DMetaSentinelPart = { pt: '_pt_sentinel' };
@@ -114,7 +114,8 @@ type DMessageToolInvocationFunctionCall = {
type DMessageToolInvocationCodeExecution = {
type: 'code_execution';
variant?: 'gemini_auto_inline';
arguments: { code: string; language?: string; };
language?: string;
code: string;
};
type DMessageToolEnvironment = 'upstream' | 'server' | 'client';
@@ -250,7 +251,7 @@ function createDMessageFunctionCallInvocationPart(id: string, name: string, args
}
function createDMessageCodeExecutionInvocationPart(id: string, code: string, language?: string, variant?: 'gemini_auto_inline'): DMessageToolInvocationPart {
return { pt: 'tool_call', id, call: { type: 'code_execution', variant, arguments: { code, language } } };
return { pt: 'tool_call', id, call: { type: 'code_execution', variant, language, code } };
}
function createDMessageFunctionCallResponsePart(id: string, result: string, _name?: string, error?: boolean | string, _environment?: DMessageToolEnvironment): DMessageToolResponsePart {
@@ -329,7 +330,7 @@ function _duplicatePart<TPart extends (DMessageContentFragment | DMessageAttachm
if (call.type === 'function_call')
return createDMessageFunctionCallInvocationPart(part.id, call.name, call.args, call._description, call._args_schema ? { ...call._args_schema } : undefined) as TPart;
else
return createDMessageCodeExecutionInvocationPart(part.id, call.arguments.code, call.arguments.language, call.variant) as TPart;
return createDMessageCodeExecutionInvocationPart(part.id, call.code, call.language, call.variant) as TPart;
case 'tool_response':
const response = part.response;
-140
View File
@@ -1,140 +0,0 @@
import type { DMessageDocPart, DMessageTextPart, DMessageToolInvocationPart, DMessageToolResponsePart } from '~/common/stores/chat/chat.fragments';
import type { Intake_InlineImagePart as AixInlineImagePart, Intake_MetaReplyToPart as AixMetaReplyToPart } from '../server/intake/schemas.intake.messages';
export type { AixInlineImagePart, AixMetaReplyToPart };
// Implementation note: we reuse the 'Parts' from the chat module, and slightly extend/modify it.
// However we may switch to the 'Fragments' concept if we need to uplevel.
//
// This is only the interface between the application and the client (there's no data storage) so
// we can change it without any migration. The client will then issue the server intake request,
// which must match exactly this format.
//
// Chat/Other -- (this schema) --> Client -- (intake request) --> Server -- (dispatch request) --> AI Service
//
// ** MUST Keep the __intake__ schemas in sync **
//
// Chat Content Generation - Request Schema
export interface AixChatContentGenerateRequest {
systemMessage?: AixSystemMessage;
chatSequence: AixChatMessage[];
tools?: AixToolDefinition[];
toolPolicy?: AixToolPolicy;
}
// export interface GenerationParameters {
// model?: string;
// temperature?: number;
// maxTokens?: number;
// }
// System Message Schema
interface AixSystemMessage {
parts: DMessageTextPart[];
}
// Input Message Schema
export type AixChatMessage =
| AixChatMessageUser
| AixChatMessageModel
| AixChatMessageTool;
export interface AixChatMessageUser {
role: 'user',
parts: (DMessageTextPart | AixInlineImagePart | DMessageDocPart | AixMetaReplyToPart)[];
}
export interface AixChatMessageModel {
role: 'model',
parts: (DMessageTextPart | AixInlineImagePart | DMessageToolInvocationPart)[];
}
interface AixChatMessageTool {
role: 'tool',
parts: DMessageToolResponsePart[];
}
export function createAixInlineImagePart(base64: string, mimeType: string): AixInlineImagePart {
return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixInlineImagePart['mimeType'], base64 };
}
export function createAixMetaReplyToPart(replyTo: string): AixMetaReplyToPart {
return { pt: 'meta_reply_to', replyTo };
}
// AIX Tools
export type AixToolDefinition =
| AixToolFunctionCallDefinition
| AixToolGeminiCodeInterpreter
| AixToolPreprocessor;
export type AixToolPolicy =
| { type: 'any' }
| { type: 'auto' }
| { type: 'function_call'; function_call: { name: string } }
| { type: 'none' };
// AIX Tools > Function Call
interface AixToolFunctionCallDefinition {
type: 'function_call';
function_call: AixFunctionCall;
}
interface AixFunctionCall {
/**
* The name of the function to call. Up to 64 characters long, and can only contain letters, numbers, underscores, and hyphens.
*/
name: string;
/**
* 3-4 sentences. Detailed description of what the tool does, when it should be used (and when not), what each parameter means, caveats and limitations.
*/
description: string;
/**
* A JSON Schema object defining the expected parameters for the function call.
* (OpenAI,Google: parameters, Anthropic: input_schema)
*/
input_schema?: {
properties: Record<string, OpenAPISchemaObject>;
required?: string[];
};
}
/**
* The TypeScript definition of an "OpenAPI 3.0.3" "Schema Object".
* This is a subset of the OpenAPI Schema Object, focused on function calling use cases.
*/
interface OpenAPISchemaObject {
type: 'string' | 'number' | 'integer' | 'boolean' | 'array' | 'object';
description?: string;
nullable?: boolean;
enum?: unknown[]; // Changed from any[] to unknown[] for better type safety
format?: string;
properties?: Record<string, OpenAPISchemaObject>;
required?: string[];
items?: OpenAPISchemaObject;
}
// AIX Tools > Gemini Code Interpreter
interface AixToolGeminiCodeInterpreter {
type: 'gemini_code_interpreter';
}
// AIX Tools > Preprocessor
interface AixToolPreprocessor {
type: 'preprocessor';
pname: 'anthropic_artifacts';
}
@@ -4,8 +4,8 @@ import type { DMessage } from '~/common/stores/chat/chat.message';
import { DMessageImageRefPart, isContentFragment, isContentOrAttachmentFragment, isTextPart } from '~/common/stores/chat/chat.fragments';
import { LLMImageResizeMode, resizeBase64ImageIfNeeded } from '~/common/util/imageUtils';
import { AixChatContentGenerateRequest, AixChatMessageModel, AixChatMessageUser, createAixInlineImagePart, createAixMetaReplyToPart } from './aix.client.api';
// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions
import type { AixAPIChatGenerate_Request, AixMessages_ModelMessage, AixMessages_UserMessage, AixParts_InlineImagePart, AixParts_MetaReplyToPart } from '../server/aix.wiretypes';
// TODO: remove console messages to zero, or replace with throws or something
@@ -19,7 +19,7 @@ export const MODEL_IMAGE_RESCALE_QUALITY = 0.90;
// AIX <> Chat Messages API helpers
//
export async function conversationMessagesToAixGenerateRequest(messageSequence: Readonly<DMessage[]>): Promise<AixChatContentGenerateRequest> {
export async function aixChatGenerateRequestFromDMessages(messageSequence: Readonly<DMessage[]>): Promise<AixAPIChatGenerate_Request> {
// reduce history
return await messageSequence.reduce(async (accPromise, m, index) => {
const acc = await accPromise;
@@ -76,11 +76,11 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
console.warn('conversationMessagesToAixGenerateRequest: unexpected User fragment part type', (uFragment.part as any).pt);
}
return uMsg;
}, Promise.resolve({ role: 'user', parts: [] } as AixChatMessageUser));
}, Promise.resolve({ role: 'user', parts: [] } as AixMessages_UserMessage));
// handle metadata on user messages
if (m.metadata?.inReplyToText)
aixChatMessageUser.parts.push(createAixMetaReplyToPart(m.metadata.inReplyToText));
aixChatMessageUser.parts.push(_clientCreateAixMetaReplyToPart(m.metadata.inReplyToText));
acc.chatSequence.push(aixChatMessageUser);
@@ -95,9 +95,10 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
switch (aFragment.part.pt) {
// intake.message.part = fragment.part
case 'text':
case 'tool_call':
// Key place where the Aix Zod inferred types are compared to the Typescript defined DMessagePart* types
// - in case of error, check that the types in `chat.fragments.ts` and `aix.wiretypes.ts` are in sync
mMsg.parts.push(aFragment.part);
break;
@@ -125,7 +126,7 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
}
return mMsg;
}, Promise.resolve({ role: 'model', parts: [] } as AixChatMessageModel));
}, Promise.resolve({ role: 'model', parts: [] } as AixMessages_ModelMessage));
acc.chatSequence.push(aixChatMessageModel);
@@ -135,10 +136,13 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
}
return acc;
}, Promise.resolve({ chatSequence: [] } as AixChatContentGenerateRequest));
}, Promise.resolve({ chatSequence: [] } as AixAPIChatGenerate_Request));
}
async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false) {
/// Parts that differ from DMessage*Part to AIX
async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false): Promise<AixParts_InlineImagePart> {
// validate
const { dataRef } = imageRefPart;
@@ -164,5 +168,13 @@ async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageR
}
}
return createAixInlineImagePart(base64Data, mimeType || dataRef.mimeType);
return _clientCreateAixInlineImagePart(base64Data, mimeType || dataRef.mimeType);
}
function _clientCreateAixInlineImagePart(base64: string, mimeType: string): AixParts_InlineImagePart {
return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixParts_InlineImagePart['mimeType'], base64 };
}
function _clientCreateAixMetaReplyToPart(replyTo: string): AixParts_MetaReplyToPart {
return { pt: 'meta_reply_to', replyTo };
}
+27 -32
View File
@@ -4,9 +4,8 @@ import { findVendorForLlmOrThrow } from '~/modules/llms/vendors/vendors.registry
import { apiStream } from '~/common/util/trpc.client';
import type { Intake_Access, Intake_ContextChatStream, Intake_Model } from '../server/intake/schemas.intake.api';
import type { AixChatContentGenerateRequest } from './aix.client.api';
// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions
import type { AixAPI_Access, AixAPI_ContextChatStream, AixAPI_Model, AixAPIChatGenerate_Request } from '~/modules/aix/server/aix.wiretypes';
export type StreamingClientUpdate = Partial<{
@@ -18,48 +17,44 @@ export type StreamingClientUpdate = Partial<{
export async function aixStreamingChatGenerate<TSourceSetup = unknown, TAccess extends ChatStreamingInputSchema['access'] = ChatStreamingInputSchema['access']>(
llmId: DLLMId,
chatGenerate: AixChatContentGenerateRequest,
intakeContextName: Intake_ContextChatStream['name'],
intakeContextRef: string,
aixChatGenerate: AixAPIChatGenerate_Request,
aixContextName: AixAPI_ContextChatStream['name'],
aixContextRef: AixAPI_ContextChatStream['ref'],
abortSignal: AbortSignal,
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
): Promise<void> {
// id to DLLM and vendor
// Aix Access
const { llm, vendor } = findVendorForLlmOrThrow<TSourceSetup, TAccess>(llmId);
// FIXME: relax the forced cast
// const llmOptions = llm.options;
const intakeModel = intakeModelFromLLMOptions(llm.options, llmId);
// get the access
const partialSourceSetup = llm._source.setup;
const intakeAccess = vendor.getTransportAccess(partialSourceSetup);
const aixAccess = vendor.getTransportAccess(partialSourceSetup);
// get any vendor-specific rate limit delay
// Aix Model - FIXME: relax the forced cast
// const llmOptions = llm.options;
const aixModel = _aixModelFromLLMOptions(llm.options, llmId);
// Aix Context
const aixContext = { method: 'chat-stream', name: aixContextName, ref: aixContextRef } as const;
// Simple rate limiting (vendor-specific)
const delay = vendor.getRateLimitDelay?.(llm, partialSourceSetup) ?? 0;
if (delay > 0)
await new Promise(resolve => setTimeout(resolve, delay));
// [OpenAI-only] check for harmful content with the free 'moderation' API, if the user requests so
// if (intakeAccess.dialect === 'openai' && intakeAccess.moderationCheck) {
// const moderationUpdate = await _openAIModerationCheck(intakeAccess, messages.at(-1) ?? null);
// if (aixAccess.dialect === 'openai' && aixAccess.moderationCheck) {
// const moderationUpdate = await _openAIModerationCheck(aixAccess, messages.at(-1) ?? null);
// if (moderationUpdate)
// return onUpdate({ textSoFar: moderationUpdate, typing: false }, true);
// }
// execute via the vendor
// return await vendor.streamingChatGenerateOrThrow(intakeAccess, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
const intakeContext = intakeContextChatStream(intakeContextName, intakeContextRef);
return await _aixStreamGenerateUnified(intakeAccess, intakeModel, chatGenerate, intakeContext, abortSignal, onUpdate);
// return await vendor.streamingChatGenerateOrThrow(aixAccess, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
return await _aixChatGenerateContent(aixAccess, aixModel, aixChatGenerate, aixContext, abortSignal, onUpdate);
}
function intakeContextChatStream(name: Intake_ContextChatStream['name'], ref: string): Intake_ContextChatStream {
return { method: 'chat-stream', name, ref };
}
function intakeModelFromLLMOptions(llmOptions: Record<string, any>, debugLlmId: string): Intake_Model {
function _aixModelFromLLMOptions(llmOptions: Record<string, any>, debugLlmId: string): AixAPI_Model {
// model params (llm)
const { llmRef, llmTemperature, llmResponseTokens } = llmOptions || {};
if (!llmRef || llmTemperature === undefined)
@@ -82,19 +77,19 @@ function intakeModelFromLLMOptions(llmOptions: Record<string, any>, debugLlmId:
*
* NOTE: onUpdate is callback when a piece of a message (text, model name, typing..) is received
*/
async function _aixStreamGenerateUnified(
// input
access: Intake_Access,
model: Intake_Model,
chatGenerate: AixChatContentGenerateRequest,
context: Intake_ContextChatStream,
async function _aixChatGenerateContent(
// aix inputs
aixAccess: AixAPI_Access,
aixModel: AixAPI_Model,
aixChatGenerate: AixAPIChatGenerate_Request,
aixContext: AixAPI_ContextChatStream,
// others
abortSignal: AbortSignal,
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
): Promise<void> {
const operation = await apiStream.aix.chatGenerateContent.mutate(
{ access, model, chatGenerate, context, streaming: true, _debugRequestBody: false },
{ access: aixAccess, model: aixModel, chatGenerate: aixChatGenerate, context: aixContext, streaming: true, _debugRequestBody: false },
{ signal: abortSignal },
);
+5 -5
View File
@@ -4,10 +4,10 @@ import { createEmptyReadableStream, safeErrorString, serverCapitalizeFirstLetter
import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
import { fetchResponseOrTRPCThrow } from '~/server/api/trpc.router.fetchers';
import { AixWire_API, AixWire_API_ChatGenerate } from './aix.wiretypes';
import { IntakeHandler } from './intake/IntakeHandler';
import { createChatGenerateDispatch } from './dispatch/chatGenerate/chatGenerate.dispatch';
import { createStreamDemuxer } from './dispatch/stream.demuxers';
import { intake_Access_schema, intake_ChatGenerateRequest_schema, intake_ContextChatStream_schema, intake_Model_schema } from './intake/schemas.intake.api';
export const aixRouter = createTRPCRouter({
@@ -18,10 +18,10 @@ export const aixRouter = createTRPCRouter({
*/
chatGenerateContent: publicProcedure
.input(z.object({
access: intake_Access_schema,
model: intake_Model_schema,
chatGenerate: intake_ChatGenerateRequest_schema,
context: intake_ContextChatStream_schema,
access: AixWire_API.Access_schema,
model: AixWire_API.Model_schema,
chatGenerate: AixWire_API_ChatGenerate.Request_schema,
context: AixWire_API.Context_schema,
streaming: z.boolean(),
_debugRequestBody: z.boolean().optional(),
}))
+38 -22
View File
@@ -14,11 +14,22 @@ import { openAIAccessSchema } from '~/modules/llms/server/openai/openai.router';
// Export types
// export type IntakeWire_DocPart = z.infer<typeof IntakeWire_Parts.DocPart_schema>;
// export type IntakeWire_InlineImagePart = z.infer<typeof IntakeWire_Parts.InlineImagePart_schema>;
// export type IntakeWire_MetaReplyToPart = z.infer<typeof IntakeWire_Parts.MetaReplyToPart_schema>;
// export type IntakeWire_ChatMessage = z.infer<typeof IntakeWire_Messages.ChatMessage_schema>;
// export type IntakeWire_SystemMessage = z.infer<typeof IntakeWire_Messages.SystemMessage_schema>;
export type AixParts_DocPart = z.infer<typeof AixWire_Parts.DocPart_schema>;
export type AixParts_InlineImagePart = z.infer<typeof AixWire_Parts.InlineImagePart_schema>;
export type AixParts_MetaReplyToPart = z.infer<typeof AixWire_Parts.MetaReplyToPart_schema>;
export type AixMessages_SystemMessage = z.infer<typeof AixWire_Messages.SystemMessage_schema>;
export type AixMessages_UserMessage = z.infer<typeof AixWire_Messages.UserMessage_schema>;
export type AixMessages_ModelMessage = z.infer<typeof AixWire_Messages.ModelMessage_schema>;
export type AixMessages_ChatMessage = z.infer<typeof AixWire_Messages.ChatMessage_schema>;
export type AixTools_ToolDefinition = z.infer<typeof AixWire_Tools.Tool_schema>;
export type AixTools_ToolsPolicy = z.infer<typeof AixWire_Tools.ToolsPolicy_schema>;
export type AixAPI_Access = z.infer<typeof AixWire_API.Access_schema>;
export type AixAPI_ContextChatStream = z.infer<typeof AixWire_API.ContextChatStream_schema>;
export type AixAPI_Model = z.infer<typeof AixWire_API.Model_schema>;
export type AixAPIChatGenerate_Request = z.infer<typeof AixWire_API_ChatGenerate.Request_schema>;
export namespace OpenAPI_Schema {
@@ -137,10 +148,8 @@ export namespace AixWire_Parts {
const _CodeExecutionInvocation_schema = z.object({
type: z.literal('code_execution'),
variant: z.literal('gemini_auto_inline').optional(),
arguments: z.object({
code: z.string(),
language: z.string().optional(),
}),
language: z.string().optional(),
code: z.string(),
});
export const ToolInvocationPart_schema = z.object({
@@ -157,7 +166,7 @@ export namespace AixWire_Parts {
const _FunctionCallResponse_schema = z.object({
type: z.literal('function_call'),
result: z.string(),
// _name: z.string().optional(),
_name: z.string().optional(),
});
const _CodeExecutionResponse_schema = z.object({
@@ -196,7 +205,7 @@ export namespace AixWire_Messages {
/// Chat Message
const _UserMessage_schema = z.object({
export const UserMessage_schema = z.object({
role: z.literal('user'),
parts: z.array(z.discriminatedUnion('pt', [
AixWire_Parts.TextPart_schema,
@@ -206,7 +215,7 @@ export namespace AixWire_Messages {
])),
});
const _ModelMessage_schema = z.object({
export const ModelMessage_schema = z.object({
role: z.literal('model'),
parts: z.array(z.discriminatedUnion('pt', [
AixWire_Parts.TextPart_schema,
@@ -215,7 +224,7 @@ export namespace AixWire_Messages {
])),
});
const _ToolMessage_schema = z.object({
export const ToolMessage_schema = z.object({
role: z.literal('tool'),
parts: z.array(z.discriminatedUnion('pt', [
AixWire_Parts.ToolResponsePart_schema,
@@ -223,9 +232,9 @@ export namespace AixWire_Messages {
});
export const ChatMessage_schema = z.discriminatedUnion('role', [
_UserMessage_schema,
_ModelMessage_schema,
_ToolMessage_schema,
UserMessage_schema,
ModelMessage_schema,
ToolMessage_schema,
]);
}
@@ -339,6 +348,18 @@ export namespace AixWire_API {
maxTokens: z.number().min(1).max(1000000).optional(),
});
/// Context
export const ContextChatStream_schema = z.object({
method: z.literal('chat-stream'),
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
ref: z.string(),
});
export const Context_schema = z.discriminatedUnion('method', [
ContextChatStream_schema,
]);
}
export namespace AixWire_API_ChatGenerate {
@@ -352,12 +373,6 @@ export namespace AixWire_API_ChatGenerate {
toolsPolicy: AixWire_Tools.ToolsPolicy_schema.optional(),
});
export const Context_ChatStream_schema = z.object({
method: z.literal('chat-stream'),
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
ref: z.string(),
});
/// Response - Events Stream
// const AixEventProto_schema = z.union([
@@ -375,3 +390,4 @@ export namespace AixWire_API_ChatGenerate {
// });
}
@@ -1,7 +1,5 @@
import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api';
import type { Intake_ChatMessage } from '../../../intake/schemas.intake.messages';
import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools';
import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '~/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes';
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes';
import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '../../wiretypes/anthropic.wiretypes';
// configuration
@@ -12,7 +10,7 @@ const hotFixMissingTokens = 4096; // [2024-07-12] max from https://docs.anthropi
type TRequest = AnthropicWire_API_Message_Create.Request;
export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, streaming: boolean): TRequest {
export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest {
// Convert the system message
const systemMessage: TRequest['system'] = chatGenerate.systemMessage?.parts.length
@@ -22,8 +20,8 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate
// Transform the chat messages into Anthropic's format
const chatMessages: TRequest['messages'] = [];
let currentMessage: TRequest['messages'][number] | null = null;
for (const intakeChatMessage of chatGenerate.chatSequence) {
for (const { role, content } of _generateAnthropicMessagesContentBlocks(intakeChatMessage)) {
for (const aixMessage of chatGenerate.chatSequence) {
for (const { role, content } of _generateAnthropicMessagesContentBlocks(aixMessage)) {
if (!currentMessage || currentMessage.role !== role) {
if (currentMessage)
chatMessages.push(currentMessage);
@@ -41,8 +39,8 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate
model: model.id,
system: systemMessage,
messages: chatMessages,
tools: chatGenerate.tools && _intakeToAnthropicTools(chatGenerate.tools),
tool_choice: chatGenerate.toolsPolicy && _intakeToAnthropicToolChoice(chatGenerate.toolsPolicy),
tools: chatGenerate.tools && _toAnthropicTools(chatGenerate.tools),
tool_choice: chatGenerate.toolsPolicy && _toAnthropicToolChoice(chatGenerate.toolsPolicy),
// metadata: { user_id: ... }
// stop_sequences: undefined,
stream: streaming,
@@ -60,7 +58,7 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate
}
function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMessage): Generator<{
function* _generateAnthropicMessagesContentBlocks({ parts, role }: AixMessages_ChatMessage): Generator<{
role: 'user' | 'assistant',
content: TRequest['messages'][number]['content'][number]
}> {
@@ -75,21 +73,27 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
}
switch (role) {
case 'user':
for (const part of parts) {
switch (part.pt) {
case 'text':
yield { role: 'user', content: AnthropicWire_Blocks.TextBlock(part.text) };
break;
case 'inline_image':
yield { role: 'user', content: AnthropicWire_Blocks.ImageBlock(part.mimeType, part.base64) };
break;
case 'doc':
yield { role: 'user', content: AnthropicWire_Blocks.TextBlock('```' + (part.ref || '') + '\n' + part.data.text + '\n```\n') };
break;
case 'meta_reply_to':
yield { role: 'user', content: AnthropicWire_Blocks.TextBlock(`<context>The user is referring to this in particular: ${part.replyTo}</context>`) };
break;
default:
throw new Error(`Unsupported part type in User message: ${(part as any).pt}`);
}
@@ -99,9 +103,11 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
case 'model':
for (const part of parts) {
switch (part.pt) {
case 'text':
yield { role: 'assistant', content: AnthropicWire_Blocks.TextBlock(part.text) };
break;
case 'inline_image':
// Example of mapping a model-generated image to a user message
if (hotFixMapModelImagesToUser) {
@@ -109,9 +115,22 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
} else
throw new Error('Model-generated images are not supported by Anthropic yet');
break;
case 'tool_call':
yield { role: 'assistant', content: AnthropicWire_Blocks.ToolUseBlock(part.id, part.name, part.args) };
let toolUseBlock;
switch (part.call.type) {
case 'function_call':
toolUseBlock = AnthropicWire_Blocks.ToolUseBlock(part.id, part.call.name, part.call.args);
break;
case 'code_execution':
toolUseBlock = AnthropicWire_Blocks.ToolUseBlock(part.id, 'execute_code' /* suboptimal */, part.call.code);
break;
default:
throw new Error(`Unsupported tool call type in Model message: ${(part.call as any).type}`);
}
yield { role: 'assistant', content: toolUseBlock };
break;
default:
throw new Error(`Unsupported part type in Model message: ${(part as any).pt}`);
}
@@ -121,10 +140,23 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
case 'tool':
for (const part of parts) {
switch (part.pt) {
case 'tool_response':
const responseContent = part.response ? [AnthropicWire_Blocks.TextBlock(part.response)] : [];
yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, responseContent, part.isError) };
const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : '';
switch (part.response.type) {
case 'function_call':
const fcTextParts = [AnthropicWire_Blocks.TextBlock(toolErrorPrefix + part.response.result)];
yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, fcTextParts, part.error ? true : undefined) };
break;
case 'code_execution':
const ceTextParts = [AnthropicWire_Blocks.TextBlock(toolErrorPrefix + part.response.result)];
yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, ceTextParts, part.error ? true : undefined) };
break;
default:
throw new Error(`Unsupported tool response type in Tool message: ${(part as any).pt}`);
}
break;
default:
throw new Error(`Unsupported part type in Tool message: ${(part as any).pt}`);
}
@@ -133,9 +165,10 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
}
}
function _intakeToAnthropicTools(itds: Intake_ToolDefinition[]): NonNullable<TRequest['tools']> {
function _toAnthropicTools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
return itds.map(itd => {
switch (itd.type) {
case 'function_call':
const { name, description, input_schema } = itd.function_call;
return {
@@ -147,15 +180,15 @@ function _intakeToAnthropicTools(itds: Intake_ToolDefinition[]): NonNullable<TRe
required: input_schema?.required,
},
};
case 'gemini_code_interpreter':
case 'code_execution':
throw new Error('Gemini code interpreter is not supported');
case 'preprocessor':
throw new Error('Preprocessors are not supported yet');
}
});
}
function _intakeToAnthropicToolChoice(itp: Intake_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
function _toAnthropicToolChoice(itp: AixTools_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
switch (itp.type) {
case 'auto':
return { type: 'auto' as const };
@@ -1,7 +1,4 @@
import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api';
import type { Intake_ChatMessage, Intake_DocPart } from '../../../intake/schemas.intake.messages';
import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools';
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixParts_DocPart, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes';
import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Messages, GeminiWire_Safety } from '../../wiretypes/gemini.wiretypes';
@@ -9,7 +6,7 @@ import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Me
const hotFixImagePartsFirst = true;
export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest {
export function aixToGeminiGenerateContent(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest {
// Note: the streaming setting is ignored as it only belongs in the path
@@ -19,13 +16,13 @@ export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate:
: undefined;
// Chat Messages
const contents: TRequest['contents'] = _intakeToGeminiContents(chatGenerate.chatSequence);
const contents: TRequest['contents'] = _toGeminiContents(chatGenerate.chatSequence);
// Construct the request payload
const payload: TRequest = {
contents,
tools: chatGenerate.tools && _intakeToGeminiTools(chatGenerate.tools),
toolConfig: chatGenerate.toolsPolicy && _intakeToGeminiToolConfig(chatGenerate.toolsPolicy),
tools: chatGenerate.tools && _toGeminiTools(chatGenerate.tools),
toolConfig: chatGenerate.toolsPolicy && _toGeminiToolConfig(chatGenerate.toolsPolicy),
safetySettings: _toGeminiSafetySettings(geminiSafetyThreshold),
systemInstruction,
generationConfig: {
@@ -51,7 +48,7 @@ export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate:
type TRequest = GeminiWire_API_Generate_Content.Request;
function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire_Messages.Content[] {
function _toGeminiContents(chatSequence: AixMessages_ChatMessage[]): GeminiWire_Messages.Content[] {
return chatSequence.map(message => {
const parts: GeminiWire_ContentParts.ContentPart[] = [];
@@ -83,12 +80,33 @@ function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire
break;
case 'tool_call':
parts.push(GeminiWire_ContentParts.FunctionCallPart(part.name, part.args));
switch (part.call.type) {
case 'function_call':
parts.push(GeminiWire_ContentParts.FunctionCallPart(part.call.name, part.call.args ?? undefined));
break;
case 'code_execution':
if (part.call.language?.toLowerCase() !== 'python')
console.warn('Gemini only supports Python code execution, but got:', part.call.language);
parts.push(GeminiWire_ContentParts.ExecutableCodePart('PYTHON', part.call.code));
break;
default:
throw new Error(`Unsupported tool call type in message: ${(part as any).call.type}`);
}
break;
case 'tool_response':
parts.push(GeminiWire_ContentParts.FunctionResponsePart(part.name, { response: part.response, isError: part.isError }));
throw new Error('Tool responses are not supported yet - Gemini Expects Objects, but we have a string...');
const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : '';
switch (part.response.type) {
case 'function_call':
parts.push(GeminiWire_ContentParts.FunctionResponsePart(part.response._name || part.id, toolErrorPrefix + part.response.result));
break;
case 'code_execution':
parts.push(GeminiWire_ContentParts.CodeExecutionResultPart(!part.error ? 'OUTCOME_OK' : 'OUTCOME_ERROR', toolErrorPrefix + part.response.result));
break;
default:
throw new Error(`Unsupported part type in message: ${(part as any).pt}`);
}
break;
default:
throw new Error(`Unsupported part type in message: ${(part as any).pt}`);
@@ -102,7 +120,7 @@ function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire
});
}
function _intakeToGeminiTools(itds: Intake_ToolDefinition[]): NonNullable<TRequest['tools']> {
function _toGeminiTools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
const tools: TRequest['tools'] = [];
itds.forEach(itd => {
@@ -116,30 +134,29 @@ function _intakeToGeminiTools(itds: Intake_ToolDefinition[]): NonNullable<TReque
functionDeclarations: [{
name,
description,
parameters: {
type: 'object',
...input_schema,
},
parameters: { type: 'object', ...input_schema },
}],
});
break;
case 'gemini_code_interpreter':
case 'code_execution':
if (itd.variant !== 'gemini_auto_inline')
throw new Error('Gemini only supports inline code execution');
// throw if code execution is present more than once
if (tools.some(tool => tool.codeExecution))
throw new Error('Gemini code interpreter already defined');
tools.push({ codeExecution: {} });
break;
case 'preprocessor':
throw new Error('Preprocessors are not supported yet');
}
});
return tools;
}
function _intakeToGeminiToolConfig(itp: Intake_ToolsPolicy): NonNullable<TRequest['toolConfig']> {
function _toGeminiToolConfig(itp: AixTools_ToolsPolicy): NonNullable<TRequest['toolConfig']> {
switch (itp.type) {
case 'auto':
return { functionCallingConfig: { mode: 'AUTO' } };
@@ -167,8 +184,8 @@ function _toGeminiSafetySettings(threshold: GeminiWire_Safety.HarmBlockThreshold
// Approximate conversions - alternative approaches should be tried until we find the best one
function _toApproximateGeminiDocPart(intakeDocPart: Intake_DocPart): GeminiWire_ContentParts.ContentPart {
return GeminiWire_ContentParts.TextPart(`\`\`\`${intakeDocPart.ref || ''}\n${intakeDocPart.data.text}\n\`\`\`\n`);
function _toApproximateGeminiDocPart(aixPartsDocPart: AixParts_DocPart): GeminiWire_ContentParts.ContentPart {
return GeminiWire_ContentParts.TextPart(`\`\`\`${aixPartsDocPart.ref || ''}\n${aixPartsDocPart.data.text}\n\`\`\`\n`);
}
function _toApproximateGeminiReplyTo(replyTo: string): GeminiWire_ContentParts.ContentPart {
@@ -1,10 +1,7 @@
import { OpenAIDialects } from '~/modules/llms/server/openai/openai.router';
import type { OpenAIDialects } from '~/modules/llms/server/openai/openai.router';
import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api';
import type { Intake_ChatMessage, Intake_SystemMessage } from '../../../intake/schemas.intake.messages';
import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools';
import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts } from '../../wiretypes/openai.wiretypes';
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixMessages_SystemMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes';
import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts, OpenAIWire_Messages } from '../../wiretypes/openai.wiretypes';
//
@@ -29,7 +26,7 @@ const hotFixSquashTextSeparator = '\n\n\n---\n\n\n';
type TRequest = OpenAIWire_API_Chat_Completions.Request;
type TRequestMessages = TRequest['messages'];
export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, jsonOutput: boolean, streaming: boolean): TRequest {
export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
// Dialect incompatibilities -> Hotfixes
const hotFixAlternateUserAssistantRoles = openAIDialect === 'perplexity';
@@ -44,7 +41,7 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model
throw new Error('This service does not support function calls');
// Convert the chat messages to the OpenAI 4-Messages format
let chatMessages = _intakeToOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence);
let chatMessages = _toOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence);
// Apply hotfixes
if (hotFixSquashMultiPartText)
@@ -61,8 +58,8 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model
let payload: TRequest = {
model: model.id,
messages: chatMessages,
tools: chatGenerate.tools && _intakeToOpenAITools(chatGenerate.tools),
tool_choice: chatGenerate.toolsPolicy && _intakeToOpenAIToolChoice(chatGenerate.toolsPolicy),
tools: chatGenerate.tools && _toOpenAITools(chatGenerate.tools),
tool_choice: chatGenerate.toolsPolicy && _toOpenAIToolChoice(chatGenerate.toolsPolicy),
parallel_tool_calls: undefined,
max_tokens: model.maxTokens !== undefined ? model.maxTokens : undefined,
temperature: model.temperature !== undefined ? model.temperature : undefined,
@@ -90,6 +87,7 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model
return validated.data;
}
function _fixAlternateUserAssistantRoles(chatMessages: TRequestMessages): TRequestMessages {
return chatMessages.reduce((acc, historyItem) => {
@@ -159,7 +157,7 @@ function _fixSquashMultiPartText(chatMessages: TRequestMessages): TRequestMessag
}*/
function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined, chatSequence: Intake_ChatMessage[]): TRequestMessages {
function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | undefined, chatSequence: AixMessages_ChatMessage[]): TRequestMessages {
// Transform the chat messages into OpenAI's format (an array of 'system', 'user', 'assistant', and 'tool' messages)
const chatMessages: TRequestMessages = [];
@@ -258,8 +256,17 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined
// - otherwise we'll add an assistant message with null message
// create a new OpenAIWire_ToolCall (specialized to function)
const toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, part.name, JSON.stringify(part.args));
let toolCallPart;
switch (part.call.type) {
case 'function_call':
toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, part.call.name, part.call.args || '');
break;
case 'code_execution':
toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, 'execute_code' /* suboptimal */, part.call.code || '');
break;
default:
throw new Error(`Unsupported tool call type in Model message: ${(part as any).pt}`);
}
// Append to existing content[], or new message
if (currentMessage?.role === 'assistant') {
@@ -283,7 +290,11 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined
switch (part.pt) {
case 'tool_response':
chatMessages.push({ role: 'tool', tool_call_id: part.id, content: part.isError ? '[ERROR]' + (part.response || '') : (part.response || '') });
const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : '';
if (part.response.type === 'function_call' || part.response.type === 'code_execution')
chatMessages.push(OpenAIWire_Messages.ToolMessage(part.id, toolErrorPrefix + part.response.result));
else
throw new Error(`Unsupported tool response type in Tool message: ${(part as any).pt}`);
break;
default:
@@ -297,10 +308,10 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined
return chatMessages;
}
function _intakeToOpenAITools(itds: Intake_ToolDefinition[]): NonNullable<TRequest['tools']> {
function _toOpenAITools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
return itds.map(itd => {
switch (itd.type) {
case 'function_call':
const { name, description, input_schema } = itd.function_call;
return {
@@ -315,15 +326,15 @@ function _intakeToOpenAITools(itds: Intake_ToolDefinition[]): NonNullable<TReque
},
},
};
case 'gemini_code_interpreter':
case 'code_execution':
throw new Error('Gemini code interpreter is not supported');
case 'preprocessor':
throw new Error('Preprocessors are not supported yet');
}
});
}
function _intakeToOpenAIToolChoice(itp: Intake_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
function _toOpenAIToolChoice(itp: AixTools_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
// NOTE: OpenAI has an additional policy 'none', which we don't have as it behaves like passing no tools at all.
// Passing no tools is mandated instead of 'none'.
switch (itp.type) {
@@ -2,13 +2,13 @@ import { anthropicAccess } from '~/modules/llms/server/anthropic/anthropic.route
import { geminiAccess } from '~/modules/llms/server/gemini/gemini.router';
import { openAIAccess } from '~/modules/llms/server/openai/openai.router';
import type { Intake_Access, Intake_ChatGenerateRequest, Intake_Model } from '../../intake/schemas.intake.api';
import type { AixAPI_Access, AixAPI_Model, AixAPIChatGenerate_Request } from '../../aix.wiretypes';
import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes';
import { intakeToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
import { intakeToGeminiGenerateContent } from './adapters/gemini.generateContent';
import { intakeToOpenAIMessageCreate } from './adapters/openai.chatCompletions';
import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent';
import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions';
import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser';
import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser';
@@ -18,7 +18,7 @@ import type { ChatGenerateParseFunction } from './chatGenerate.types';
import type { StreamDemuxerFormat } from '../stream.demuxers';
export function createChatGenerateDispatch(access: Intake_Access, model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, streaming: boolean): {
export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): {
request: { url: string, headers: HeadersInit, body: object },
demuxerFormat: StreamDemuxerFormat;
chatGenerateParse: ChatGenerateParseFunction;
@@ -29,7 +29,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_
return {
request: {
...anthropicAccess(access, '/v1/messages'),
body: intakeToAnthropicMessageCreate(model, chatGenerate, streaming),
body: aixToAnthropicMessageCreate(model, chatGenerate, streaming),
},
demuxerFormat: streaming ? 'sse' : null,
chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(),
@@ -39,7 +39,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_
return {
request: {
...geminiAccess(access, model.id, streaming ? GeminiWire_API_Generate_Content.streamingPostPath : GeminiWire_API_Generate_Content.postPath),
body: intakeToGeminiGenerateContent(model, chatGenerate, access.minSafetyLevel, false, streaming),
body: aixToGeminiGenerateContent(model, chatGenerate, access.minSafetyLevel, false, streaming),
},
demuxerFormat: streaming ? 'sse' : null,
chatGenerateParse: createGeminiGenerateContentResponseParser(model.id),
@@ -70,7 +70,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_
return {
request: {
...openAIAccess(access, model.id, '/v1/chat/completions'),
body: intakeToOpenAIMessageCreate(access.dialect, model, chatGenerate, false, streaming),
body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
},
demuxerFormat: streaming ? 'sse' : null,
chatGenerateParse: createOpenAIMessageCreateParser(),
@@ -64,14 +64,18 @@ export namespace GeminiWire_ContentParts {
export const FunctionCallPart_schema = z.object({
functionCall: z.object({
name: z.string(),
args: z.record(z.any()).optional(), // JSON object format
// NOTE: replacing with a single string for now, leaving serialization to the caller eventually
args: z.string().optional(),
// args: z.record(z.any()).optional(), // JSON object format
}),
});
const FunctionResponsePart_schema = z.object({
functionResponse: z.object({
name: z.string(),
response: z.record(z.any()), // Optional. JSON object format
// NOTE: replacing with a single string for now, leaving serialization to the caller eventually
response: z.string().optional(),
// response: z.record(z.any()), // Optional. JSON object format
}),
});
@@ -123,14 +127,22 @@ export namespace GeminiWire_ContentParts {
return { inlineData: { mimeType, data } };
}
export function FunctionCallPart(name: string, args?: Record<string, any>): z.infer<typeof FunctionCallPart_schema> {
export function FunctionCallPart(name: string, args?: string): z.infer<typeof FunctionCallPart_schema> {
return { functionCall: { name, args } };
}
export function FunctionResponsePart(name: string, response: Record<string, any>): z.infer<typeof FunctionResponsePart_schema> {
export function FunctionResponsePart(name: string, response?: string): z.infer<typeof FunctionResponsePart_schema> {
return { functionResponse: { name, response } };
}
export function ExecutableCodePart(language: 'PYTHON', code: string): z.infer<typeof unstable_ExecutableCodePart_schema> {
return { executableCode: { language, code } };
}
export function CodeExecutionResultPart(outcome: 'OUTCOME_OK' | 'OUTCOME_ERROR', output?: string): z.infer<typeof unstable_CodeExecutionResultPart_schema> {
return { codeExecutionResult: { outcome, output } };
}
}
export namespace GeminiWire_Messages {
@@ -185,7 +197,14 @@ export namespace GeminiWire_Tools {
* https://ai.google.dev/api/rest/v1beta/cachedContents#schema
* Here we relax the check.
*/
parameters: z.record(z.any()).optional(),
parameters: z.object({
type: z.literal('object'),
/**
* For stricter validation, use the OpenAPI_Schema.Object_schema
*/
properties: z.record(z.any()).optional(),
required: z.array(z.string()).optional(),
}),
});
export const Tool_schema = z.object({
@@ -1,7 +1,5 @@
import { z } from 'zod';
import { openAPI_SchemaObject_schema } from '../../intake/schemas.intake.tools';
//
// Implementation notes:
@@ -60,14 +58,14 @@ export namespace OpenAIWire_ContentParts {
}),
});
export const ToolCall_schema = z.discriminatedUnion('type', [
PredictedFunctionCall_schema,
]);
export function PredictedFunctionCall(toolCallId: string, functionName: string, functionArgs: string): z.infer<typeof PredictedFunctionCall_schema> {
return { type: 'function', id: toolCallId, function: { name: functionName, arguments: functionArgs } };
}
export const ToolCall_schema = z.discriminatedUnion('type', [
PredictedFunctionCall_schema,
]);
}
export namespace OpenAIWire_Messages {
@@ -107,6 +105,10 @@ export namespace OpenAIWire_Messages {
tool_call_id: z.string(),
});
export function ToolMessage(toolCallId: string, content: string): z.infer<typeof ToolMessage_schema> {
return { role: 'tool', content, tool_call_id: toolCallId };
}
export const Message_schema = z.discriminatedUnion('role', [
SystemMessage_schema,
UserMessage_schema,
@@ -138,16 +140,12 @@ export namespace OpenAIWire_Tools {
*/
parameters: z.object({
type: z.literal('object'),
properties: z.record(openAPI_SchemaObject_schema),
// Note: We commented out the code below in favor of the line above, because the OpenAPI 3.0.3 Schema object
// is in the 'Intake' API spec (.properties), and here we just need to pass that object upstream.
// properties: z.record(z.object({
// type: z.enum(['string', 'number', 'integer', 'boolean']),
// description: z.string().optional(),
// enum: z.array(z.string()).optional(),
// })),
/**
* For stricter validation, use the OpenAPI_Schema.Object_schema
*/
properties: z.record(z.any()).optional(),
required: z.array(z.string()).optional(),
}).optional(),
}),
});
export const ToolDefinition_schema = z.discriminatedUnion('type', [