mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
AIX: Massive updates
This commit is contained in:
@@ -1,11 +1,10 @@
|
||||
import type { DLLMId } from '~/modules/llms/store-llms';
|
||||
|
||||
import type { AixChatContentGenerateRequest } from '~/modules/aix/client/aix.client.api';
|
||||
import type { Intake_ContextChatStream } from '~/modules/aix/server/intake/schemas.intake.api';
|
||||
import type { AixAPI_ContextChatStream, AixAPIChatGenerate_Request } from '~/modules/aix/server/aix.wiretypes';
|
||||
import { aixChatGenerateRequestFromDMessages } from '~/modules/aix/client/aix.client.fromDMessages.api';
|
||||
import { aixStreamingChatGenerate, StreamingClientUpdate } from '~/modules/aix/client/aix.client';
|
||||
import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle';
|
||||
import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
|
||||
import { conversationMessagesToAixGenerateRequest } from '~/modules/aix/client/aix.client.messages.api';
|
||||
|
||||
import type { DConversationId } from '~/common/stores/chat/chat.conversation';
|
||||
import { ConversationsManager } from '~/common/chats/ConversationsManager';
|
||||
@@ -54,7 +53,7 @@ export async function runPersonaOnConversationHead(
|
||||
|
||||
|
||||
// stream the assistant's messages directly to the state store
|
||||
const aixChatContentGenerateRequest = await conversationMessagesToAixGenerateRequest(history);
|
||||
const aixChatContentGenerateRequest = await aixChatGenerateRequestFromDMessages(history);
|
||||
const messageStatus = await llmGenerateContentStream(
|
||||
assistantLlmId,
|
||||
aixChatContentGenerateRequest,
|
||||
@@ -100,9 +99,9 @@ export type StreamMessageUpdate = Pick<DMessage, 'fragments' | 'originLLM' | 'pe
|
||||
|
||||
export async function llmGenerateContentStream(
|
||||
llmId: DLLMId,
|
||||
chatGenerate: AixChatContentGenerateRequest,
|
||||
intakeContextName: Intake_ContextChatStream['name'],
|
||||
intakeContextRef: string,
|
||||
aixChatGenerate: AixAPIChatGenerate_Request,
|
||||
aixContextName: AixAPI_ContextChatStream['name'],
|
||||
aixContextRef: AixAPI_ContextChatStream['ref'],
|
||||
parallelViewCount: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
|
||||
abortSignal: AbortSignal,
|
||||
onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
|
||||
@@ -119,7 +118,7 @@ export async function llmGenerateContentStream(
|
||||
|
||||
try {
|
||||
|
||||
await aixStreamingChatGenerate(llmId, chatGenerate, intakeContextName, intakeContextRef, abortSignal,
|
||||
await aixStreamingChatGenerate(llmId, aixChatGenerate, aixContextName, aixContextRef, abortSignal,
|
||||
(update: StreamingClientUpdate, done: boolean) => {
|
||||
|
||||
// grow the incremental message
|
||||
|
||||
@@ -76,7 +76,7 @@ export type DMessageDocPart = { pt: 'doc', type: DMessageDocMimeType, data: DMes
|
||||
export type DMessageErrorPart = { pt: 'error', error: string };
|
||||
export type DMessageImageRefPart = { pt: 'image_ref', dataRef: DMessageDataRef, altText?: string, width?: number, height?: number };
|
||||
export type DMessageTextPart = { pt: 'text', text: string };
|
||||
export type DMessageToolInvocationPart = { pt: 'tool_call', id: string, call: DMessageToolInvocationFunctionCall | DMessageToolInvocationCodeExecution }; // Note that the definition of tools is in AIX.Intake
|
||||
export type DMessageToolInvocationPart = { pt: 'tool_call', id: string, call: DMessageToolInvocationFunctionCall | DMessageToolInvocationCodeExecution };
|
||||
export type DMessageToolResponsePart = { pt: 'tool_response', id: string, response: DMessageToolResponseFunctionCall | DMessageToolResponseCodeExecution, error?: boolean | string, _environment?: DMessageToolEnvironment };
|
||||
export type DMetaPlaceholderPart = { pt: 'ph', pText: string };
|
||||
type _DMetaSentinelPart = { pt: '_pt_sentinel' };
|
||||
@@ -114,7 +114,8 @@ type DMessageToolInvocationFunctionCall = {
|
||||
type DMessageToolInvocationCodeExecution = {
|
||||
type: 'code_execution';
|
||||
variant?: 'gemini_auto_inline';
|
||||
arguments: { code: string; language?: string; };
|
||||
language?: string;
|
||||
code: string;
|
||||
};
|
||||
|
||||
type DMessageToolEnvironment = 'upstream' | 'server' | 'client';
|
||||
@@ -250,7 +251,7 @@ function createDMessageFunctionCallInvocationPart(id: string, name: string, args
|
||||
}
|
||||
|
||||
function createDMessageCodeExecutionInvocationPart(id: string, code: string, language?: string, variant?: 'gemini_auto_inline'): DMessageToolInvocationPart {
|
||||
return { pt: 'tool_call', id, call: { type: 'code_execution', variant, arguments: { code, language } } };
|
||||
return { pt: 'tool_call', id, call: { type: 'code_execution', variant, language, code } };
|
||||
}
|
||||
|
||||
function createDMessageFunctionCallResponsePart(id: string, result: string, _name?: string, error?: boolean | string, _environment?: DMessageToolEnvironment): DMessageToolResponsePart {
|
||||
@@ -329,7 +330,7 @@ function _duplicatePart<TPart extends (DMessageContentFragment | DMessageAttachm
|
||||
if (call.type === 'function_call')
|
||||
return createDMessageFunctionCallInvocationPart(part.id, call.name, call.args, call._description, call._args_schema ? { ...call._args_schema } : undefined) as TPart;
|
||||
else
|
||||
return createDMessageCodeExecutionInvocationPart(part.id, call.arguments.code, call.arguments.language, call.variant) as TPart;
|
||||
return createDMessageCodeExecutionInvocationPart(part.id, call.code, call.language, call.variant) as TPart;
|
||||
|
||||
case 'tool_response':
|
||||
const response = part.response;
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
import type { DMessageDocPart, DMessageTextPart, DMessageToolInvocationPart, DMessageToolResponsePart } from '~/common/stores/chat/chat.fragments';
|
||||
|
||||
import type { Intake_InlineImagePart as AixInlineImagePart, Intake_MetaReplyToPart as AixMetaReplyToPart } from '../server/intake/schemas.intake.messages';
|
||||
|
||||
export type { AixInlineImagePart, AixMetaReplyToPart };
|
||||
|
||||
// Implementation note: we reuse the 'Parts' from the chat module, and slightly extend/modify it.
|
||||
// However we may switch to the 'Fragments' concept if we need to uplevel.
|
||||
//
|
||||
// This is only the interface between the application and the client (there's no data storage) so
|
||||
// we can change it without any migration. The client will then issue the server intake request,
|
||||
// which must match exactly this format.
|
||||
//
|
||||
// Chat/Other -- (this schema) --> Client -- (intake request) --> Server -- (dispatch request) --> AI Service
|
||||
//
|
||||
// ** MUST Keep the __intake__ schemas in sync **
|
||||
//
|
||||
|
||||
// Chat Content Generation - Request Schema
|
||||
|
||||
export interface AixChatContentGenerateRequest {
|
||||
systemMessage?: AixSystemMessage;
|
||||
chatSequence: AixChatMessage[];
|
||||
tools?: AixToolDefinition[];
|
||||
toolPolicy?: AixToolPolicy;
|
||||
}
|
||||
|
||||
// export interface GenerationParameters {
|
||||
// model?: string;
|
||||
// temperature?: number;
|
||||
// maxTokens?: number;
|
||||
// }
|
||||
|
||||
|
||||
// System Message Schema
|
||||
|
||||
interface AixSystemMessage {
|
||||
parts: DMessageTextPart[];
|
||||
}
|
||||
|
||||
|
||||
// Input Message Schema
|
||||
|
||||
export type AixChatMessage =
|
||||
| AixChatMessageUser
|
||||
| AixChatMessageModel
|
||||
| AixChatMessageTool;
|
||||
|
||||
export interface AixChatMessageUser {
|
||||
role: 'user',
|
||||
parts: (DMessageTextPart | AixInlineImagePart | DMessageDocPart | AixMetaReplyToPart)[];
|
||||
}
|
||||
|
||||
export interface AixChatMessageModel {
|
||||
role: 'model',
|
||||
parts: (DMessageTextPart | AixInlineImagePart | DMessageToolInvocationPart)[];
|
||||
}
|
||||
|
||||
interface AixChatMessageTool {
|
||||
role: 'tool',
|
||||
parts: DMessageToolResponsePart[];
|
||||
}
|
||||
|
||||
export function createAixInlineImagePart(base64: string, mimeType: string): AixInlineImagePart {
|
||||
return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixInlineImagePart['mimeType'], base64 };
|
||||
}
|
||||
|
||||
export function createAixMetaReplyToPart(replyTo: string): AixMetaReplyToPart {
|
||||
return { pt: 'meta_reply_to', replyTo };
|
||||
}
|
||||
|
||||
|
||||
// AIX Tools
|
||||
|
||||
export type AixToolDefinition =
|
||||
| AixToolFunctionCallDefinition
|
||||
| AixToolGeminiCodeInterpreter
|
||||
| AixToolPreprocessor;
|
||||
|
||||
export type AixToolPolicy =
|
||||
| { type: 'any' }
|
||||
| { type: 'auto' }
|
||||
| { type: 'function_call'; function_call: { name: string } }
|
||||
| { type: 'none' };
|
||||
|
||||
|
||||
// AIX Tools > Function Call
|
||||
|
||||
interface AixToolFunctionCallDefinition {
|
||||
type: 'function_call';
|
||||
function_call: AixFunctionCall;
|
||||
}
|
||||
|
||||
interface AixFunctionCall {
|
||||
/**
|
||||
* The name of the function to call. Up to 64 characters long, and can only contain letters, numbers, underscores, and hyphens.
|
||||
*/
|
||||
name: string;
|
||||
/**
|
||||
* 3-4 sentences. Detailed description of what the tool does, when it should be used (and when not), what each parameter means, caveats and limitations.
|
||||
*/
|
||||
description: string;
|
||||
/**
|
||||
* A JSON Schema object defining the expected parameters for the function call.
|
||||
* (OpenAI,Google: parameters, Anthropic: input_schema)
|
||||
*/
|
||||
input_schema?: {
|
||||
properties: Record<string, OpenAPISchemaObject>;
|
||||
required?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* The TypeScript definition of an "OpenAPI 3.0.3" "Schema Object".
|
||||
* This is a subset of the OpenAPI Schema Object, focused on function calling use cases.
|
||||
*/
|
||||
interface OpenAPISchemaObject {
|
||||
type: 'string' | 'number' | 'integer' | 'boolean' | 'array' | 'object';
|
||||
description?: string;
|
||||
nullable?: boolean;
|
||||
enum?: unknown[]; // Changed from any[] to unknown[] for better type safety
|
||||
format?: string;
|
||||
properties?: Record<string, OpenAPISchemaObject>;
|
||||
required?: string[];
|
||||
items?: OpenAPISchemaObject;
|
||||
}
|
||||
|
||||
|
||||
// AIX Tools > Gemini Code Interpreter
|
||||
|
||||
interface AixToolGeminiCodeInterpreter {
|
||||
type: 'gemini_code_interpreter';
|
||||
}
|
||||
|
||||
// AIX Tools > Preprocessor
|
||||
|
||||
interface AixToolPreprocessor {
|
||||
type: 'preprocessor';
|
||||
pname: 'anthropic_artifacts';
|
||||
}
|
||||
+22
-10
@@ -4,8 +4,8 @@ import type { DMessage } from '~/common/stores/chat/chat.message';
|
||||
import { DMessageImageRefPart, isContentFragment, isContentOrAttachmentFragment, isTextPart } from '~/common/stores/chat/chat.fragments';
|
||||
import { LLMImageResizeMode, resizeBase64ImageIfNeeded } from '~/common/util/imageUtils';
|
||||
|
||||
import { AixChatContentGenerateRequest, AixChatMessageModel, AixChatMessageUser, createAixInlineImagePart, createAixMetaReplyToPart } from './aix.client.api';
|
||||
|
||||
// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions
|
||||
import type { AixAPIChatGenerate_Request, AixMessages_ModelMessage, AixMessages_UserMessage, AixParts_InlineImagePart, AixParts_MetaReplyToPart } from '../server/aix.wiretypes';
|
||||
|
||||
// TODO: remove console messages to zero, or replace with throws or something
|
||||
|
||||
@@ -19,7 +19,7 @@ export const MODEL_IMAGE_RESCALE_QUALITY = 0.90;
|
||||
// AIX <> Chat Messages API helpers
|
||||
//
|
||||
|
||||
export async function conversationMessagesToAixGenerateRequest(messageSequence: Readonly<DMessage[]>): Promise<AixChatContentGenerateRequest> {
|
||||
export async function aixChatGenerateRequestFromDMessages(messageSequence: Readonly<DMessage[]>): Promise<AixAPIChatGenerate_Request> {
|
||||
// reduce history
|
||||
return await messageSequence.reduce(async (accPromise, m, index) => {
|
||||
const acc = await accPromise;
|
||||
@@ -76,11 +76,11 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
|
||||
console.warn('conversationMessagesToAixGenerateRequest: unexpected User fragment part type', (uFragment.part as any).pt);
|
||||
}
|
||||
return uMsg;
|
||||
}, Promise.resolve({ role: 'user', parts: [] } as AixChatMessageUser));
|
||||
}, Promise.resolve({ role: 'user', parts: [] } as AixMessages_UserMessage));
|
||||
|
||||
// handle metadata on user messages
|
||||
if (m.metadata?.inReplyToText)
|
||||
aixChatMessageUser.parts.push(createAixMetaReplyToPart(m.metadata.inReplyToText));
|
||||
aixChatMessageUser.parts.push(_clientCreateAixMetaReplyToPart(m.metadata.inReplyToText));
|
||||
|
||||
acc.chatSequence.push(aixChatMessageUser);
|
||||
|
||||
@@ -95,9 +95,10 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
|
||||
|
||||
switch (aFragment.part.pt) {
|
||||
|
||||
// intake.message.part = fragment.part
|
||||
case 'text':
|
||||
case 'tool_call':
|
||||
// Key place where the Aix Zod inferred types are compared to the Typescript defined DMessagePart* types
|
||||
// - in case of error, check that the types in `chat.fragments.ts` and `aix.wiretypes.ts` are in sync
|
||||
mMsg.parts.push(aFragment.part);
|
||||
break;
|
||||
|
||||
@@ -125,7 +126,7 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
|
||||
|
||||
}
|
||||
return mMsg;
|
||||
}, Promise.resolve({ role: 'model', parts: [] } as AixChatMessageModel));
|
||||
}, Promise.resolve({ role: 'model', parts: [] } as AixMessages_ModelMessage));
|
||||
|
||||
acc.chatSequence.push(aixChatMessageModel);
|
||||
|
||||
@@ -135,10 +136,13 @@ export async function conversationMessagesToAixGenerateRequest(messageSequence:
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, Promise.resolve({ chatSequence: [] } as AixChatContentGenerateRequest));
|
||||
}, Promise.resolve({ chatSequence: [] } as AixAPIChatGenerate_Request));
|
||||
}
|
||||
|
||||
async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false) {
|
||||
|
||||
/// Parts that differ from DMessage*Part to AIX
|
||||
|
||||
async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false): Promise<AixParts_InlineImagePart> {
|
||||
|
||||
// validate
|
||||
const { dataRef } = imageRefPart;
|
||||
@@ -164,5 +168,13 @@ async function _convertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageR
|
||||
}
|
||||
}
|
||||
|
||||
return createAixInlineImagePart(base64Data, mimeType || dataRef.mimeType);
|
||||
return _clientCreateAixInlineImagePart(base64Data, mimeType || dataRef.mimeType);
|
||||
}
|
||||
|
||||
function _clientCreateAixInlineImagePart(base64: string, mimeType: string): AixParts_InlineImagePart {
|
||||
return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixParts_InlineImagePart['mimeType'], base64 };
|
||||
}
|
||||
|
||||
function _clientCreateAixMetaReplyToPart(replyTo: string): AixParts_MetaReplyToPart {
|
||||
return { pt: 'meta_reply_to', replyTo };
|
||||
}
|
||||
@@ -4,9 +4,8 @@ import { findVendorForLlmOrThrow } from '~/modules/llms/vendors/vendors.registry
|
||||
|
||||
import { apiStream } from '~/common/util/trpc.client';
|
||||
|
||||
import type { Intake_Access, Intake_ContextChatStream, Intake_Model } from '../server/intake/schemas.intake.api';
|
||||
|
||||
import type { AixChatContentGenerateRequest } from './aix.client.api';
|
||||
// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions
|
||||
import type { AixAPI_Access, AixAPI_ContextChatStream, AixAPI_Model, AixAPIChatGenerate_Request } from '~/modules/aix/server/aix.wiretypes';
|
||||
|
||||
|
||||
export type StreamingClientUpdate = Partial<{
|
||||
@@ -18,48 +17,44 @@ export type StreamingClientUpdate = Partial<{
|
||||
|
||||
export async function aixStreamingChatGenerate<TSourceSetup = unknown, TAccess extends ChatStreamingInputSchema['access'] = ChatStreamingInputSchema['access']>(
|
||||
llmId: DLLMId,
|
||||
chatGenerate: AixChatContentGenerateRequest,
|
||||
intakeContextName: Intake_ContextChatStream['name'],
|
||||
intakeContextRef: string,
|
||||
aixChatGenerate: AixAPIChatGenerate_Request,
|
||||
aixContextName: AixAPI_ContextChatStream['name'],
|
||||
aixContextRef: AixAPI_ContextChatStream['ref'],
|
||||
abortSignal: AbortSignal,
|
||||
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
|
||||
): Promise<void> {
|
||||
|
||||
// id to DLLM and vendor
|
||||
// Aix Access
|
||||
const { llm, vendor } = findVendorForLlmOrThrow<TSourceSetup, TAccess>(llmId);
|
||||
|
||||
// FIXME: relax the forced cast
|
||||
// const llmOptions = llm.options;
|
||||
const intakeModel = intakeModelFromLLMOptions(llm.options, llmId);
|
||||
|
||||
// get the access
|
||||
const partialSourceSetup = llm._source.setup;
|
||||
const intakeAccess = vendor.getTransportAccess(partialSourceSetup);
|
||||
const aixAccess = vendor.getTransportAccess(partialSourceSetup);
|
||||
|
||||
// get any vendor-specific rate limit delay
|
||||
// Aix Model - FIXME: relax the forced cast
|
||||
// const llmOptions = llm.options;
|
||||
const aixModel = _aixModelFromLLMOptions(llm.options, llmId);
|
||||
|
||||
// Aix Context
|
||||
const aixContext = { method: 'chat-stream', name: aixContextName, ref: aixContextRef } as const;
|
||||
|
||||
// Simple rate limiting (vendor-specific)
|
||||
const delay = vendor.getRateLimitDelay?.(llm, partialSourceSetup) ?? 0;
|
||||
if (delay > 0)
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
|
||||
// [OpenAI-only] check for harmful content with the free 'moderation' API, if the user requests so
|
||||
// if (intakeAccess.dialect === 'openai' && intakeAccess.moderationCheck) {
|
||||
// const moderationUpdate = await _openAIModerationCheck(intakeAccess, messages.at(-1) ?? null);
|
||||
// if (aixAccess.dialect === 'openai' && aixAccess.moderationCheck) {
|
||||
// const moderationUpdate = await _openAIModerationCheck(aixAccess, messages.at(-1) ?? null);
|
||||
// if (moderationUpdate)
|
||||
// return onUpdate({ textSoFar: moderationUpdate, typing: false }, true);
|
||||
// }
|
||||
|
||||
|
||||
// execute via the vendor
|
||||
// return await vendor.streamingChatGenerateOrThrow(intakeAccess, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
|
||||
const intakeContext = intakeContextChatStream(intakeContextName, intakeContextRef);
|
||||
return await _aixStreamGenerateUnified(intakeAccess, intakeModel, chatGenerate, intakeContext, abortSignal, onUpdate);
|
||||
// return await vendor.streamingChatGenerateOrThrow(aixAccess, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
|
||||
return await _aixChatGenerateContent(aixAccess, aixModel, aixChatGenerate, aixContext, abortSignal, onUpdate);
|
||||
}
|
||||
|
||||
function intakeContextChatStream(name: Intake_ContextChatStream['name'], ref: string): Intake_ContextChatStream {
|
||||
return { method: 'chat-stream', name, ref };
|
||||
}
|
||||
|
||||
function intakeModelFromLLMOptions(llmOptions: Record<string, any>, debugLlmId: string): Intake_Model {
|
||||
function _aixModelFromLLMOptions(llmOptions: Record<string, any>, debugLlmId: string): AixAPI_Model {
|
||||
// model params (llm)
|
||||
const { llmRef, llmTemperature, llmResponseTokens } = llmOptions || {};
|
||||
if (!llmRef || llmTemperature === undefined)
|
||||
@@ -82,19 +77,19 @@ function intakeModelFromLLMOptions(llmOptions: Record<string, any>, debugLlmId:
|
||||
*
|
||||
* NOTE: onUpdate is callback when a piece of a message (text, model name, typing..) is received
|
||||
*/
|
||||
async function _aixStreamGenerateUnified(
|
||||
// input
|
||||
access: Intake_Access,
|
||||
model: Intake_Model,
|
||||
chatGenerate: AixChatContentGenerateRequest,
|
||||
context: Intake_ContextChatStream,
|
||||
async function _aixChatGenerateContent(
|
||||
// aix inputs
|
||||
aixAccess: AixAPI_Access,
|
||||
aixModel: AixAPI_Model,
|
||||
aixChatGenerate: AixAPIChatGenerate_Request,
|
||||
aixContext: AixAPI_ContextChatStream,
|
||||
// others
|
||||
abortSignal: AbortSignal,
|
||||
onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
|
||||
): Promise<void> {
|
||||
|
||||
const operation = await apiStream.aix.chatGenerateContent.mutate(
|
||||
{ access, model, chatGenerate, context, streaming: true, _debugRequestBody: false },
|
||||
{ access: aixAccess, model: aixModel, chatGenerate: aixChatGenerate, context: aixContext, streaming: true, _debugRequestBody: false },
|
||||
{ signal: abortSignal },
|
||||
);
|
||||
|
||||
|
||||
@@ -4,10 +4,10 @@ import { createEmptyReadableStream, safeErrorString, serverCapitalizeFirstLetter
|
||||
import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
|
||||
import { fetchResponseOrTRPCThrow } from '~/server/api/trpc.router.fetchers';
|
||||
|
||||
import { AixWire_API, AixWire_API_ChatGenerate } from './aix.wiretypes';
|
||||
import { IntakeHandler } from './intake/IntakeHandler';
|
||||
import { createChatGenerateDispatch } from './dispatch/chatGenerate/chatGenerate.dispatch';
|
||||
import { createStreamDemuxer } from './dispatch/stream.demuxers';
|
||||
import { intake_Access_schema, intake_ChatGenerateRequest_schema, intake_ContextChatStream_schema, intake_Model_schema } from './intake/schemas.intake.api';
|
||||
|
||||
|
||||
export const aixRouter = createTRPCRouter({
|
||||
@@ -18,10 +18,10 @@ export const aixRouter = createTRPCRouter({
|
||||
*/
|
||||
chatGenerateContent: publicProcedure
|
||||
.input(z.object({
|
||||
access: intake_Access_schema,
|
||||
model: intake_Model_schema,
|
||||
chatGenerate: intake_ChatGenerateRequest_schema,
|
||||
context: intake_ContextChatStream_schema,
|
||||
access: AixWire_API.Access_schema,
|
||||
model: AixWire_API.Model_schema,
|
||||
chatGenerate: AixWire_API_ChatGenerate.Request_schema,
|
||||
context: AixWire_API.Context_schema,
|
||||
streaming: z.boolean(),
|
||||
_debugRequestBody: z.boolean().optional(),
|
||||
}))
|
||||
|
||||
@@ -14,11 +14,22 @@ import { openAIAccessSchema } from '~/modules/llms/server/openai/openai.router';
|
||||
|
||||
|
||||
// Export types
|
||||
// export type IntakeWire_DocPart = z.infer<typeof IntakeWire_Parts.DocPart_schema>;
|
||||
// export type IntakeWire_InlineImagePart = z.infer<typeof IntakeWire_Parts.InlineImagePart_schema>;
|
||||
// export type IntakeWire_MetaReplyToPart = z.infer<typeof IntakeWire_Parts.MetaReplyToPart_schema>;
|
||||
// export type IntakeWire_ChatMessage = z.infer<typeof IntakeWire_Messages.ChatMessage_schema>;
|
||||
// export type IntakeWire_SystemMessage = z.infer<typeof IntakeWire_Messages.SystemMessage_schema>;
|
||||
export type AixParts_DocPart = z.infer<typeof AixWire_Parts.DocPart_schema>;
|
||||
export type AixParts_InlineImagePart = z.infer<typeof AixWire_Parts.InlineImagePart_schema>;
|
||||
export type AixParts_MetaReplyToPart = z.infer<typeof AixWire_Parts.MetaReplyToPart_schema>;
|
||||
|
||||
export type AixMessages_SystemMessage = z.infer<typeof AixWire_Messages.SystemMessage_schema>;
|
||||
export type AixMessages_UserMessage = z.infer<typeof AixWire_Messages.UserMessage_schema>;
|
||||
export type AixMessages_ModelMessage = z.infer<typeof AixWire_Messages.ModelMessage_schema>;
|
||||
export type AixMessages_ChatMessage = z.infer<typeof AixWire_Messages.ChatMessage_schema>;
|
||||
|
||||
export type AixTools_ToolDefinition = z.infer<typeof AixWire_Tools.Tool_schema>;
|
||||
export type AixTools_ToolsPolicy = z.infer<typeof AixWire_Tools.ToolsPolicy_schema>;
|
||||
|
||||
export type AixAPI_Access = z.infer<typeof AixWire_API.Access_schema>;
|
||||
export type AixAPI_ContextChatStream = z.infer<typeof AixWire_API.ContextChatStream_schema>;
|
||||
export type AixAPI_Model = z.infer<typeof AixWire_API.Model_schema>;
|
||||
export type AixAPIChatGenerate_Request = z.infer<typeof AixWire_API_ChatGenerate.Request_schema>;
|
||||
|
||||
|
||||
export namespace OpenAPI_Schema {
|
||||
@@ -137,10 +148,8 @@ export namespace AixWire_Parts {
|
||||
const _CodeExecutionInvocation_schema = z.object({
|
||||
type: z.literal('code_execution'),
|
||||
variant: z.literal('gemini_auto_inline').optional(),
|
||||
arguments: z.object({
|
||||
code: z.string(),
|
||||
language: z.string().optional(),
|
||||
}),
|
||||
language: z.string().optional(),
|
||||
code: z.string(),
|
||||
});
|
||||
|
||||
export const ToolInvocationPart_schema = z.object({
|
||||
@@ -157,7 +166,7 @@ export namespace AixWire_Parts {
|
||||
const _FunctionCallResponse_schema = z.object({
|
||||
type: z.literal('function_call'),
|
||||
result: z.string(),
|
||||
// _name: z.string().optional(),
|
||||
_name: z.string().optional(),
|
||||
});
|
||||
|
||||
const _CodeExecutionResponse_schema = z.object({
|
||||
@@ -196,7 +205,7 @@ export namespace AixWire_Messages {
|
||||
|
||||
/// Chat Message
|
||||
|
||||
const _UserMessage_schema = z.object({
|
||||
export const UserMessage_schema = z.object({
|
||||
role: z.literal('user'),
|
||||
parts: z.array(z.discriminatedUnion('pt', [
|
||||
AixWire_Parts.TextPart_schema,
|
||||
@@ -206,7 +215,7 @@ export namespace AixWire_Messages {
|
||||
])),
|
||||
});
|
||||
|
||||
const _ModelMessage_schema = z.object({
|
||||
export const ModelMessage_schema = z.object({
|
||||
role: z.literal('model'),
|
||||
parts: z.array(z.discriminatedUnion('pt', [
|
||||
AixWire_Parts.TextPart_schema,
|
||||
@@ -215,7 +224,7 @@ export namespace AixWire_Messages {
|
||||
])),
|
||||
});
|
||||
|
||||
const _ToolMessage_schema = z.object({
|
||||
export const ToolMessage_schema = z.object({
|
||||
role: z.literal('tool'),
|
||||
parts: z.array(z.discriminatedUnion('pt', [
|
||||
AixWire_Parts.ToolResponsePart_schema,
|
||||
@@ -223,9 +232,9 @@ export namespace AixWire_Messages {
|
||||
});
|
||||
|
||||
export const ChatMessage_schema = z.discriminatedUnion('role', [
|
||||
_UserMessage_schema,
|
||||
_ModelMessage_schema,
|
||||
_ToolMessage_schema,
|
||||
UserMessage_schema,
|
||||
ModelMessage_schema,
|
||||
ToolMessage_schema,
|
||||
]);
|
||||
|
||||
}
|
||||
@@ -339,6 +348,18 @@ export namespace AixWire_API {
|
||||
maxTokens: z.number().min(1).max(1000000).optional(),
|
||||
});
|
||||
|
||||
/// Context
|
||||
|
||||
export const ContextChatStream_schema = z.object({
|
||||
method: z.literal('chat-stream'),
|
||||
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
|
||||
ref: z.string(),
|
||||
});
|
||||
|
||||
export const Context_schema = z.discriminatedUnion('method', [
|
||||
ContextChatStream_schema,
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
export namespace AixWire_API_ChatGenerate {
|
||||
@@ -352,12 +373,6 @@ export namespace AixWire_API_ChatGenerate {
|
||||
toolsPolicy: AixWire_Tools.ToolsPolicy_schema.optional(),
|
||||
});
|
||||
|
||||
export const Context_ChatStream_schema = z.object({
|
||||
method: z.literal('chat-stream'),
|
||||
name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
|
||||
ref: z.string(),
|
||||
});
|
||||
|
||||
/// Response - Events Stream
|
||||
|
||||
// const AixEventProto_schema = z.union([
|
||||
@@ -375,3 +390,4 @@ export namespace AixWire_API_ChatGenerate {
|
||||
// });
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api';
|
||||
import type { Intake_ChatMessage } from '../../../intake/schemas.intake.messages';
|
||||
import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools';
|
||||
import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '~/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes';
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes';
|
||||
import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '../../wiretypes/anthropic.wiretypes';
|
||||
|
||||
|
||||
// configuration
|
||||
@@ -12,7 +10,7 @@ const hotFixMissingTokens = 4096; // [2024-07-12] max from https://docs.anthropi
|
||||
|
||||
type TRequest = AnthropicWire_API_Message_Create.Request;
|
||||
|
||||
export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, streaming: boolean): TRequest {
|
||||
export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest {
|
||||
|
||||
// Convert the system message
|
||||
const systemMessage: TRequest['system'] = chatGenerate.systemMessage?.parts.length
|
||||
@@ -22,8 +20,8 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate
|
||||
// Transform the chat messages into Anthropic's format
|
||||
const chatMessages: TRequest['messages'] = [];
|
||||
let currentMessage: TRequest['messages'][number] | null = null;
|
||||
for (const intakeChatMessage of chatGenerate.chatSequence) {
|
||||
for (const { role, content } of _generateAnthropicMessagesContentBlocks(intakeChatMessage)) {
|
||||
for (const aixMessage of chatGenerate.chatSequence) {
|
||||
for (const { role, content } of _generateAnthropicMessagesContentBlocks(aixMessage)) {
|
||||
if (!currentMessage || currentMessage.role !== role) {
|
||||
if (currentMessage)
|
||||
chatMessages.push(currentMessage);
|
||||
@@ -41,8 +39,8 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate
|
||||
model: model.id,
|
||||
system: systemMessage,
|
||||
messages: chatMessages,
|
||||
tools: chatGenerate.tools && _intakeToAnthropicTools(chatGenerate.tools),
|
||||
tool_choice: chatGenerate.toolsPolicy && _intakeToAnthropicToolChoice(chatGenerate.toolsPolicy),
|
||||
tools: chatGenerate.tools && _toAnthropicTools(chatGenerate.tools),
|
||||
tool_choice: chatGenerate.toolsPolicy && _toAnthropicToolChoice(chatGenerate.toolsPolicy),
|
||||
// metadata: { user_id: ... }
|
||||
// stop_sequences: undefined,
|
||||
stream: streaming,
|
||||
@@ -60,7 +58,7 @@ export function intakeToAnthropicMessageCreate(model: Intake_Model, chatGenerate
|
||||
}
|
||||
|
||||
|
||||
function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMessage): Generator<{
|
||||
function* _generateAnthropicMessagesContentBlocks({ parts, role }: AixMessages_ChatMessage): Generator<{
|
||||
role: 'user' | 'assistant',
|
||||
content: TRequest['messages'][number]['content'][number]
|
||||
}> {
|
||||
@@ -75,21 +73,27 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
|
||||
}
|
||||
|
||||
switch (role) {
|
||||
|
||||
case 'user':
|
||||
for (const part of parts) {
|
||||
switch (part.pt) {
|
||||
|
||||
case 'text':
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.TextBlock(part.text) };
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.ImageBlock(part.mimeType, part.base64) };
|
||||
break;
|
||||
|
||||
case 'doc':
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.TextBlock('```' + (part.ref || '') + '\n' + part.data.text + '\n```\n') };
|
||||
break;
|
||||
|
||||
case 'meta_reply_to':
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.TextBlock(`<context>The user is referring to this in particular: ${part.replyTo}</context>`) };
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported part type in User message: ${(part as any).pt}`);
|
||||
}
|
||||
@@ -99,9 +103,11 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
|
||||
case 'model':
|
||||
for (const part of parts) {
|
||||
switch (part.pt) {
|
||||
|
||||
case 'text':
|
||||
yield { role: 'assistant', content: AnthropicWire_Blocks.TextBlock(part.text) };
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
// Example of mapping a model-generated image to a user message
|
||||
if (hotFixMapModelImagesToUser) {
|
||||
@@ -109,9 +115,22 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
|
||||
} else
|
||||
throw new Error('Model-generated images are not supported by Anthropic yet');
|
||||
break;
|
||||
|
||||
case 'tool_call':
|
||||
yield { role: 'assistant', content: AnthropicWire_Blocks.ToolUseBlock(part.id, part.name, part.args) };
|
||||
let toolUseBlock;
|
||||
switch (part.call.type) {
|
||||
case 'function_call':
|
||||
toolUseBlock = AnthropicWire_Blocks.ToolUseBlock(part.id, part.call.name, part.call.args);
|
||||
break;
|
||||
case 'code_execution':
|
||||
toolUseBlock = AnthropicWire_Blocks.ToolUseBlock(part.id, 'execute_code' /* suboptimal */, part.call.code);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported tool call type in Model message: ${(part.call as any).type}`);
|
||||
}
|
||||
yield { role: 'assistant', content: toolUseBlock };
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported part type in Model message: ${(part as any).pt}`);
|
||||
}
|
||||
@@ -121,10 +140,23 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
|
||||
case 'tool':
|
||||
for (const part of parts) {
|
||||
switch (part.pt) {
|
||||
|
||||
case 'tool_response':
|
||||
const responseContent = part.response ? [AnthropicWire_Blocks.TextBlock(part.response)] : [];
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, responseContent, part.isError) };
|
||||
const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : '';
|
||||
switch (part.response.type) {
|
||||
case 'function_call':
|
||||
const fcTextParts = [AnthropicWire_Blocks.TextBlock(toolErrorPrefix + part.response.result)];
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, fcTextParts, part.error ? true : undefined) };
|
||||
break;
|
||||
case 'code_execution':
|
||||
const ceTextParts = [AnthropicWire_Blocks.TextBlock(toolErrorPrefix + part.response.result)];
|
||||
yield { role: 'user', content: AnthropicWire_Blocks.ToolResultBlock(part.id, ceTextParts, part.error ? true : undefined) };
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported tool response type in Tool message: ${(part as any).pt}`);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported part type in Tool message: ${(part as any).pt}`);
|
||||
}
|
||||
@@ -133,9 +165,10 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: Intake_ChatMe
|
||||
}
|
||||
}
|
||||
|
||||
function _intakeToAnthropicTools(itds: Intake_ToolDefinition[]): NonNullable<TRequest['tools']> {
|
||||
function _toAnthropicTools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
|
||||
return itds.map(itd => {
|
||||
switch (itd.type) {
|
||||
|
||||
case 'function_call':
|
||||
const { name, description, input_schema } = itd.function_call;
|
||||
return {
|
||||
@@ -147,15 +180,15 @@ function _intakeToAnthropicTools(itds: Intake_ToolDefinition[]): NonNullable<TRe
|
||||
required: input_schema?.required,
|
||||
},
|
||||
};
|
||||
case 'gemini_code_interpreter':
|
||||
|
||||
case 'code_execution':
|
||||
throw new Error('Gemini code interpreter is not supported');
|
||||
case 'preprocessor':
|
||||
throw new Error('Preprocessors are not supported yet');
|
||||
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function _intakeToAnthropicToolChoice(itp: Intake_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
|
||||
function _toAnthropicToolChoice(itp: AixTools_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
|
||||
switch (itp.type) {
|
||||
case 'auto':
|
||||
return { type: 'auto' as const };
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api';
|
||||
import type { Intake_ChatMessage, Intake_DocPart } from '../../../intake/schemas.intake.messages';
|
||||
import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools';
|
||||
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixParts_DocPart, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes';
|
||||
import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Messages, GeminiWire_Safety } from '../../wiretypes/gemini.wiretypes';
|
||||
|
||||
|
||||
@@ -9,7 +6,7 @@ import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Me
|
||||
const hotFixImagePartsFirst = true;
|
||||
|
||||
|
||||
export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest {
|
||||
export function aixToGeminiGenerateContent(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest {
|
||||
|
||||
// Note: the streaming setting is ignored as it only belongs in the path
|
||||
|
||||
@@ -19,13 +16,13 @@ export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate:
|
||||
: undefined;
|
||||
|
||||
// Chat Messages
|
||||
const contents: TRequest['contents'] = _intakeToGeminiContents(chatGenerate.chatSequence);
|
||||
const contents: TRequest['contents'] = _toGeminiContents(chatGenerate.chatSequence);
|
||||
|
||||
// Construct the request payload
|
||||
const payload: TRequest = {
|
||||
contents,
|
||||
tools: chatGenerate.tools && _intakeToGeminiTools(chatGenerate.tools),
|
||||
toolConfig: chatGenerate.toolsPolicy && _intakeToGeminiToolConfig(chatGenerate.toolsPolicy),
|
||||
tools: chatGenerate.tools && _toGeminiTools(chatGenerate.tools),
|
||||
toolConfig: chatGenerate.toolsPolicy && _toGeminiToolConfig(chatGenerate.toolsPolicy),
|
||||
safetySettings: _toGeminiSafetySettings(geminiSafetyThreshold),
|
||||
systemInstruction,
|
||||
generationConfig: {
|
||||
@@ -51,7 +48,7 @@ export function intakeToGeminiGenerateContent(model: Intake_Model, chatGenerate:
|
||||
type TRequest = GeminiWire_API_Generate_Content.Request;
|
||||
|
||||
|
||||
function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire_Messages.Content[] {
|
||||
function _toGeminiContents(chatSequence: AixMessages_ChatMessage[]): GeminiWire_Messages.Content[] {
|
||||
return chatSequence.map(message => {
|
||||
const parts: GeminiWire_ContentParts.ContentPart[] = [];
|
||||
|
||||
@@ -83,12 +80,33 @@ function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire
|
||||
break;
|
||||
|
||||
case 'tool_call':
|
||||
parts.push(GeminiWire_ContentParts.FunctionCallPart(part.name, part.args));
|
||||
switch (part.call.type) {
|
||||
case 'function_call':
|
||||
parts.push(GeminiWire_ContentParts.FunctionCallPart(part.call.name, part.call.args ?? undefined));
|
||||
break;
|
||||
case 'code_execution':
|
||||
if (part.call.language?.toLowerCase() !== 'python')
|
||||
console.warn('Gemini only supports Python code execution, but got:', part.call.language);
|
||||
parts.push(GeminiWire_ContentParts.ExecutableCodePart('PYTHON', part.call.code));
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported tool call type in message: ${(part as any).call.type}`);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'tool_response':
|
||||
parts.push(GeminiWire_ContentParts.FunctionResponsePart(part.name, { response: part.response, isError: part.isError }));
|
||||
throw new Error('Tool responses are not supported yet - Gemini Expects Objects, but we have a string...');
|
||||
const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : '';
|
||||
switch (part.response.type) {
|
||||
case 'function_call':
|
||||
parts.push(GeminiWire_ContentParts.FunctionResponsePart(part.response._name || part.id, toolErrorPrefix + part.response.result));
|
||||
break;
|
||||
case 'code_execution':
|
||||
parts.push(GeminiWire_ContentParts.CodeExecutionResultPart(!part.error ? 'OUTCOME_OK' : 'OUTCOME_ERROR', toolErrorPrefix + part.response.result));
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported part type in message: ${(part as any).pt}`);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported part type in message: ${(part as any).pt}`);
|
||||
@@ -102,7 +120,7 @@ function _intakeToGeminiContents(chatSequence: Intake_ChatMessage[]): GeminiWire
|
||||
});
|
||||
}
|
||||
|
||||
function _intakeToGeminiTools(itds: Intake_ToolDefinition[]): NonNullable<TRequest['tools']> {
|
||||
function _toGeminiTools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
|
||||
const tools: TRequest['tools'] = [];
|
||||
|
||||
itds.forEach(itd => {
|
||||
@@ -116,30 +134,29 @@ function _intakeToGeminiTools(itds: Intake_ToolDefinition[]): NonNullable<TReque
|
||||
functionDeclarations: [{
|
||||
name,
|
||||
description,
|
||||
parameters: {
|
||||
type: 'object',
|
||||
...input_schema,
|
||||
},
|
||||
parameters: { type: 'object', ...input_schema },
|
||||
}],
|
||||
});
|
||||
break;
|
||||
|
||||
case 'gemini_code_interpreter':
|
||||
case 'code_execution':
|
||||
if (itd.variant !== 'gemini_auto_inline')
|
||||
throw new Error('Gemini only supports inline code execution');
|
||||
|
||||
// throw if code execution is present more than once
|
||||
if (tools.some(tool => tool.codeExecution))
|
||||
throw new Error('Gemini code interpreter already defined');
|
||||
|
||||
tools.push({ codeExecution: {} });
|
||||
break;
|
||||
|
||||
case 'preprocessor':
|
||||
throw new Error('Preprocessors are not supported yet');
|
||||
}
|
||||
});
|
||||
|
||||
return tools;
|
||||
}
|
||||
|
||||
function _intakeToGeminiToolConfig(itp: Intake_ToolsPolicy): NonNullable<TRequest['toolConfig']> {
|
||||
function _toGeminiToolConfig(itp: AixTools_ToolsPolicy): NonNullable<TRequest['toolConfig']> {
|
||||
switch (itp.type) {
|
||||
case 'auto':
|
||||
return { functionCallingConfig: { mode: 'AUTO' } };
|
||||
@@ -167,8 +184,8 @@ function _toGeminiSafetySettings(threshold: GeminiWire_Safety.HarmBlockThreshold
|
||||
|
||||
// Approximate conversions - alternative approaches should be tried until we find the best one
|
||||
|
||||
function _toApproximateGeminiDocPart(intakeDocPart: Intake_DocPart): GeminiWire_ContentParts.ContentPart {
|
||||
return GeminiWire_ContentParts.TextPart(`\`\`\`${intakeDocPart.ref || ''}\n${intakeDocPart.data.text}\n\`\`\`\n`);
|
||||
function _toApproximateGeminiDocPart(aixPartsDocPart: AixParts_DocPart): GeminiWire_ContentParts.ContentPart {
|
||||
return GeminiWire_ContentParts.TextPart(`\`\`\`${aixPartsDocPart.ref || ''}\n${aixPartsDocPart.data.text}\n\`\`\`\n`);
|
||||
}
|
||||
|
||||
function _toApproximateGeminiReplyTo(replyTo: string): GeminiWire_ContentParts.ContentPart {
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import { OpenAIDialects } from '~/modules/llms/server/openai/openai.router';
|
||||
import type { OpenAIDialects } from '~/modules/llms/server/openai/openai.router';
|
||||
|
||||
import type { Intake_ChatGenerateRequest, Intake_Model } from '../../../intake/schemas.intake.api';
|
||||
import type { Intake_ChatMessage, Intake_SystemMessage } from '../../../intake/schemas.intake.messages';
|
||||
import type { Intake_ToolDefinition, Intake_ToolsPolicy } from '../../../intake/schemas.intake.tools';
|
||||
|
||||
import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts } from '../../wiretypes/openai.wiretypes';
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixMessages_SystemMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../aix.wiretypes';
|
||||
import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts, OpenAIWire_Messages } from '../../wiretypes/openai.wiretypes';
|
||||
|
||||
|
||||
//
|
||||
@@ -29,7 +26,7 @@ const hotFixSquashTextSeparator = '\n\n\n---\n\n\n';
|
||||
type TRequest = OpenAIWire_API_Chat_Completions.Request;
|
||||
type TRequestMessages = TRequest['messages'];
|
||||
|
||||
export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, jsonOutput: boolean, streaming: boolean): TRequest {
|
||||
export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
|
||||
|
||||
// Dialect incompatibilities -> Hotfixes
|
||||
const hotFixAlternateUserAssistantRoles = openAIDialect === 'perplexity';
|
||||
@@ -44,7 +41,7 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model
|
||||
throw new Error('This service does not support function calls');
|
||||
|
||||
// Convert the chat messages to the OpenAI 4-Messages format
|
||||
let chatMessages = _intakeToOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence);
|
||||
let chatMessages = _toOpenAIMessages(chatGenerate.systemMessage, chatGenerate.chatSequence);
|
||||
|
||||
// Apply hotfixes
|
||||
if (hotFixSquashMultiPartText)
|
||||
@@ -61,8 +58,8 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model
|
||||
let payload: TRequest = {
|
||||
model: model.id,
|
||||
messages: chatMessages,
|
||||
tools: chatGenerate.tools && _intakeToOpenAITools(chatGenerate.tools),
|
||||
tool_choice: chatGenerate.toolsPolicy && _intakeToOpenAIToolChoice(chatGenerate.toolsPolicy),
|
||||
tools: chatGenerate.tools && _toOpenAITools(chatGenerate.tools),
|
||||
tool_choice: chatGenerate.toolsPolicy && _toOpenAIToolChoice(chatGenerate.toolsPolicy),
|
||||
parallel_tool_calls: undefined,
|
||||
max_tokens: model.maxTokens !== undefined ? model.maxTokens : undefined,
|
||||
temperature: model.temperature !== undefined ? model.temperature : undefined,
|
||||
@@ -90,6 +87,7 @@ export function intakeToOpenAIMessageCreate(openAIDialect: OpenAIDialects, model
|
||||
return validated.data;
|
||||
}
|
||||
|
||||
|
||||
function _fixAlternateUserAssistantRoles(chatMessages: TRequestMessages): TRequestMessages {
|
||||
return chatMessages.reduce((acc, historyItem) => {
|
||||
|
||||
@@ -159,7 +157,7 @@ function _fixSquashMultiPartText(chatMessages: TRequestMessages): TRequestMessag
|
||||
}*/
|
||||
|
||||
|
||||
function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined, chatSequence: Intake_ChatMessage[]): TRequestMessages {
|
||||
function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | undefined, chatSequence: AixMessages_ChatMessage[]): TRequestMessages {
|
||||
|
||||
// Transform the chat messages into OpenAI's format (an array of 'system', 'user', 'assistant', and 'tool' messages)
|
||||
const chatMessages: TRequestMessages = [];
|
||||
@@ -258,8 +256,17 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined
|
||||
// - otherwise we'll add an assistant message with null message
|
||||
|
||||
// create a new OpenAIWire_ToolCall (specialized to function)
|
||||
const toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, part.name, JSON.stringify(part.args));
|
||||
|
||||
let toolCallPart;
|
||||
switch (part.call.type) {
|
||||
case 'function_call':
|
||||
toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, part.call.name, part.call.args || '');
|
||||
break;
|
||||
case 'code_execution':
|
||||
toolCallPart = OpenAIWire_ContentParts.PredictedFunctionCall(part.id, 'execute_code' /* suboptimal */, part.call.code || '');
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported tool call type in Model message: ${(part as any).pt}`);
|
||||
}
|
||||
|
||||
// Append to existing content[], or new message
|
||||
if (currentMessage?.role === 'assistant') {
|
||||
@@ -283,7 +290,11 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined
|
||||
switch (part.pt) {
|
||||
|
||||
case 'tool_response':
|
||||
chatMessages.push({ role: 'tool', tool_call_id: part.id, content: part.isError ? '[ERROR]' + (part.response || '') : (part.response || '') });
|
||||
const toolErrorPrefix = part.error ? (typeof part.error === 'string' ? `[ERROR] ${part.error} - ` : '[ERROR] ') : '';
|
||||
if (part.response.type === 'function_call' || part.response.type === 'code_execution')
|
||||
chatMessages.push(OpenAIWire_Messages.ToolMessage(part.id, toolErrorPrefix + part.response.result));
|
||||
else
|
||||
throw new Error(`Unsupported tool response type in Tool message: ${(part as any).pt}`);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -297,10 +308,10 @@ function _intakeToOpenAIMessages(systemMessage: Intake_SystemMessage | undefined
|
||||
return chatMessages;
|
||||
}
|
||||
|
||||
|
||||
function _intakeToOpenAITools(itds: Intake_ToolDefinition[]): NonNullable<TRequest['tools']> {
|
||||
function _toOpenAITools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
|
||||
return itds.map(itd => {
|
||||
switch (itd.type) {
|
||||
|
||||
case 'function_call':
|
||||
const { name, description, input_schema } = itd.function_call;
|
||||
return {
|
||||
@@ -315,15 +326,15 @@ function _intakeToOpenAITools(itds: Intake_ToolDefinition[]): NonNullable<TReque
|
||||
},
|
||||
},
|
||||
};
|
||||
case 'gemini_code_interpreter':
|
||||
|
||||
case 'code_execution':
|
||||
throw new Error('Gemini code interpreter is not supported');
|
||||
case 'preprocessor':
|
||||
throw new Error('Preprocessors are not supported yet');
|
||||
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function _intakeToOpenAIToolChoice(itp: Intake_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
|
||||
function _toOpenAIToolChoice(itp: AixTools_ToolsPolicy): NonNullable<TRequest['tool_choice']> {
|
||||
// NOTE: OpenAI has an additional policy 'none', which we don't have as it behaves like passing no tools at all.
|
||||
// Passing no tools is mandated instead of 'none'.
|
||||
switch (itp.type) {
|
||||
|
||||
@@ -2,13 +2,13 @@ import { anthropicAccess } from '~/modules/llms/server/anthropic/anthropic.route
|
||||
import { geminiAccess } from '~/modules/llms/server/gemini/gemini.router';
|
||||
import { openAIAccess } from '~/modules/llms/server/openai/openai.router';
|
||||
|
||||
import type { Intake_Access, Intake_ChatGenerateRequest, Intake_Model } from '../../intake/schemas.intake.api';
|
||||
import type { AixAPI_Access, AixAPI_Model, AixAPIChatGenerate_Request } from '../../aix.wiretypes';
|
||||
|
||||
import { GeminiWire_API_Generate_Content } from '../wiretypes/gemini.wiretypes';
|
||||
|
||||
import { intakeToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
|
||||
import { intakeToGeminiGenerateContent } from './adapters/gemini.generateContent';
|
||||
import { intakeToOpenAIMessageCreate } from './adapters/openai.chatCompletions';
|
||||
import { aixToAnthropicMessageCreate } from './adapters/anthropic.messageCreate';
|
||||
import { aixToGeminiGenerateContent } from './adapters/gemini.generateContent';
|
||||
import { aixToOpenAIChatCompletions } from './adapters/openai.chatCompletions';
|
||||
|
||||
import { createAnthropicMessageParser, createAnthropicMessageParserNS } from './parsers/anthropic.parser';
|
||||
import { createGeminiGenerateContentResponseParser } from './parsers/gemini.parser';
|
||||
@@ -18,7 +18,7 @@ import type { ChatGenerateParseFunction } from './chatGenerate.types';
|
||||
import type { StreamDemuxerFormat } from '../stream.demuxers';
|
||||
|
||||
|
||||
export function createChatGenerateDispatch(access: Intake_Access, model: Intake_Model, chatGenerate: Intake_ChatGenerateRequest, streaming: boolean): {
|
||||
export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): {
|
||||
request: { url: string, headers: HeadersInit, body: object },
|
||||
demuxerFormat: StreamDemuxerFormat;
|
||||
chatGenerateParse: ChatGenerateParseFunction;
|
||||
@@ -29,7 +29,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_
|
||||
return {
|
||||
request: {
|
||||
...anthropicAccess(access, '/v1/messages'),
|
||||
body: intakeToAnthropicMessageCreate(model, chatGenerate, streaming),
|
||||
body: aixToAnthropicMessageCreate(model, chatGenerate, streaming),
|
||||
},
|
||||
demuxerFormat: streaming ? 'sse' : null,
|
||||
chatGenerateParse: streaming ? createAnthropicMessageParser() : createAnthropicMessageParserNS(),
|
||||
@@ -39,7 +39,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_
|
||||
return {
|
||||
request: {
|
||||
...geminiAccess(access, model.id, streaming ? GeminiWire_API_Generate_Content.streamingPostPath : GeminiWire_API_Generate_Content.postPath),
|
||||
body: intakeToGeminiGenerateContent(model, chatGenerate, access.minSafetyLevel, false, streaming),
|
||||
body: aixToGeminiGenerateContent(model, chatGenerate, access.minSafetyLevel, false, streaming),
|
||||
},
|
||||
demuxerFormat: streaming ? 'sse' : null,
|
||||
chatGenerateParse: createGeminiGenerateContentResponseParser(model.id),
|
||||
@@ -70,7 +70,7 @@ export function createChatGenerateDispatch(access: Intake_Access, model: Intake_
|
||||
return {
|
||||
request: {
|
||||
...openAIAccess(access, model.id, '/v1/chat/completions'),
|
||||
body: intakeToOpenAIMessageCreate(access.dialect, model, chatGenerate, false, streaming),
|
||||
body: aixToOpenAIChatCompletions(access.dialect, model, chatGenerate, false, streaming),
|
||||
},
|
||||
demuxerFormat: streaming ? 'sse' : null,
|
||||
chatGenerateParse: createOpenAIMessageCreateParser(),
|
||||
|
||||
@@ -64,14 +64,18 @@ export namespace GeminiWire_ContentParts {
|
||||
export const FunctionCallPart_schema = z.object({
|
||||
functionCall: z.object({
|
||||
name: z.string(),
|
||||
args: z.record(z.any()).optional(), // JSON object format
|
||||
// NOTE: replacing with a single string for now, leaving serialization to the caller eventually
|
||||
args: z.string().optional(),
|
||||
// args: z.record(z.any()).optional(), // JSON object format
|
||||
}),
|
||||
});
|
||||
|
||||
const FunctionResponsePart_schema = z.object({
|
||||
functionResponse: z.object({
|
||||
name: z.string(),
|
||||
response: z.record(z.any()), // Optional. JSON object format
|
||||
// NOTE: replacing with a single string for now, leaving serialization to the caller eventually
|
||||
response: z.string().optional(),
|
||||
// response: z.record(z.any()), // Optional. JSON object format
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -123,14 +127,22 @@ export namespace GeminiWire_ContentParts {
|
||||
return { inlineData: { mimeType, data } };
|
||||
}
|
||||
|
||||
export function FunctionCallPart(name: string, args?: Record<string, any>): z.infer<typeof FunctionCallPart_schema> {
|
||||
export function FunctionCallPart(name: string, args?: string): z.infer<typeof FunctionCallPart_schema> {
|
||||
return { functionCall: { name, args } };
|
||||
}
|
||||
|
||||
export function FunctionResponsePart(name: string, response: Record<string, any>): z.infer<typeof FunctionResponsePart_schema> {
|
||||
export function FunctionResponsePart(name: string, response?: string): z.infer<typeof FunctionResponsePart_schema> {
|
||||
return { functionResponse: { name, response } };
|
||||
}
|
||||
|
||||
export function ExecutableCodePart(language: 'PYTHON', code: string): z.infer<typeof unstable_ExecutableCodePart_schema> {
|
||||
return { executableCode: { language, code } };
|
||||
}
|
||||
|
||||
export function CodeExecutionResultPart(outcome: 'OUTCOME_OK' | 'OUTCOME_ERROR', output?: string): z.infer<typeof unstable_CodeExecutionResultPart_schema> {
|
||||
return { codeExecutionResult: { outcome, output } };
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export namespace GeminiWire_Messages {
|
||||
@@ -185,7 +197,14 @@ export namespace GeminiWire_Tools {
|
||||
* https://ai.google.dev/api/rest/v1beta/cachedContents#schema
|
||||
* Here we relax the check.
|
||||
*/
|
||||
parameters: z.record(z.any()).optional(),
|
||||
parameters: z.object({
|
||||
type: z.literal('object'),
|
||||
/**
|
||||
* For stricter validation, use the OpenAPI_Schema.Object_schema
|
||||
*/
|
||||
properties: z.record(z.any()).optional(),
|
||||
required: z.array(z.string()).optional(),
|
||||
}),
|
||||
});
|
||||
|
||||
export const Tool_schema = z.object({
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
import { openAPI_SchemaObject_schema } from '../../intake/schemas.intake.tools';
|
||||
|
||||
|
||||
//
|
||||
// Implementation notes:
|
||||
@@ -60,14 +58,14 @@ export namespace OpenAIWire_ContentParts {
|
||||
}),
|
||||
});
|
||||
|
||||
export const ToolCall_schema = z.discriminatedUnion('type', [
|
||||
PredictedFunctionCall_schema,
|
||||
]);
|
||||
|
||||
export function PredictedFunctionCall(toolCallId: string, functionName: string, functionArgs: string): z.infer<typeof PredictedFunctionCall_schema> {
|
||||
return { type: 'function', id: toolCallId, function: { name: functionName, arguments: functionArgs } };
|
||||
}
|
||||
|
||||
export const ToolCall_schema = z.discriminatedUnion('type', [
|
||||
PredictedFunctionCall_schema,
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
export namespace OpenAIWire_Messages {
|
||||
@@ -107,6 +105,10 @@ export namespace OpenAIWire_Messages {
|
||||
tool_call_id: z.string(),
|
||||
});
|
||||
|
||||
export function ToolMessage(toolCallId: string, content: string): z.infer<typeof ToolMessage_schema> {
|
||||
return { role: 'tool', content, tool_call_id: toolCallId };
|
||||
}
|
||||
|
||||
export const Message_schema = z.discriminatedUnion('role', [
|
||||
SystemMessage_schema,
|
||||
UserMessage_schema,
|
||||
@@ -138,16 +140,12 @@ export namespace OpenAIWire_Tools {
|
||||
*/
|
||||
parameters: z.object({
|
||||
type: z.literal('object'),
|
||||
properties: z.record(openAPI_SchemaObject_schema),
|
||||
// Note: We commented out the code below in favor of the line above, because the OpenAPI 3.0.3 Schema object
|
||||
// is in the 'Intake' API spec (.properties), and here we just need to pass that object upstream.
|
||||
// properties: z.record(z.object({
|
||||
// type: z.enum(['string', 'number', 'integer', 'boolean']),
|
||||
// description: z.string().optional(),
|
||||
// enum: z.array(z.string()).optional(),
|
||||
// })),
|
||||
/**
|
||||
* For stricter validation, use the OpenAPI_Schema.Object_schema
|
||||
*/
|
||||
properties: z.record(z.any()).optional(),
|
||||
required: z.array(z.string()).optional(),
|
||||
}).optional(),
|
||||
}),
|
||||
});
|
||||
|
||||
export const ToolDefinition_schema = z.discriminatedUnion('type', [
|
||||
|
||||
Reference in New Issue
Block a user