mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
AIX: Support for Images on System Messages
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
import { escapeXml } from '~/server/wire';
|
||||
|
||||
import { AixAPIChatGenerate_Request,
|
||||
AixMessages_ChatMessage, AixMessages_SystemMessage, AixMessages_UserMessage, AixParts_DocPart, AixParts_MetaInReferenceToPart } from '../../../api/aix.wiretypes';
|
||||
|
||||
|
||||
/**
|
||||
* CGR Server-side approximate Helper
|
||||
* Finds a cut point (if any) in the system message to move everything after it to a user message.
|
||||
*/
|
||||
export function aixSpillSystemToUser(chatGenerate: AixAPIChatGenerate_Request, splitItems: AixMessages_SystemMessage['parts'][number]['pt'][] = ['inline_image']): AixAPIChatGenerate_Request & { systemSplit: boolean } {
|
||||
let systemSplit = false;
|
||||
let { systemMessage, chatSequence } = chatGenerate;
|
||||
|
||||
// check if splittable
|
||||
if (systemMessage?.parts.length) {
|
||||
const splitIndex = systemMessage.parts.findIndex((p) => splitItems.includes(p.pt));
|
||||
if (splitIndex >= 0) {
|
||||
// perform the split
|
||||
const partsPreSplit = systemMessage.parts.slice(0, splitIndex);
|
||||
const partsPostSplit = systemMessage.parts.slice(splitIndex);
|
||||
|
||||
// system message keeps the first part
|
||||
systemMessage = {
|
||||
...systemMessage,
|
||||
parts: partsPreSplit
|
||||
};
|
||||
|
||||
// user message gets the rest
|
||||
const userSynthMessage: AixMessages_UserMessage & { _FLUSH: true } = {
|
||||
role: 'user',
|
||||
parts: partsPostSplit,
|
||||
_FLUSH: true, // make sure we finalize this part; this is a bit of a hack
|
||||
};
|
||||
chatSequence = [userSynthMessage, ...chatSequence];
|
||||
systemSplit = true;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...chatGenerate,
|
||||
systemMessage,
|
||||
chatSequence: chatSequence,
|
||||
systemSplit,
|
||||
}
|
||||
}
|
||||
|
||||
export function aixSpillShallFlush(message: AixMessages_ChatMessage): boolean {
|
||||
return '_FLUSH' in message && !!message._FLUSH;
|
||||
}
|
||||
|
||||
|
||||
// Approximate conversions - alternative approaches should be tried until we find the best one
|
||||
|
||||
export function approxDocPart_To_String({ ref, data }: AixParts_DocPart /*, wrapFormat?: 'markdown-code'*/): string {
|
||||
// NOTE: Consider a better representation here
|
||||
//
|
||||
// We use the 'legacy' markdown encoding, but we may consider:
|
||||
// - '<doc id='ref' title='title' version='version'>\n...\n</doc>'
|
||||
// - ```doc id='ref' title='title' version='version'\n...\n```
|
||||
// - # Title [id='ref' version='version']\n...\n
|
||||
// - ...more ideas...
|
||||
//
|
||||
return '```' + (ref || '') + '\n' + data.text + '\n```\n';
|
||||
}
|
||||
|
||||
export function approxInReferenceTo_To_XMLString(irt: AixParts_MetaInReferenceToPart): string | null {
|
||||
const refs = irt.referTo.map(r => escapeXml(r.mText));
|
||||
if (!refs.length)
|
||||
return null; // `<context>User provides no specific references</context>`;
|
||||
return refs.length === 1
|
||||
? `<context>User refers to this in particular:<ref>${refs[0]}</ref></context>`
|
||||
: `<context>User refers to ${refs.length} items:<ref>${refs.join('</ref><ref>')}</ref></context>`;
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
import { escapeXml } from '~/server/wire';
|
||||
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixParts_DocPart, AixParts_MetaInReferenceToPart, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../api/aix.wiretypes';
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../api/aix.wiretypes';
|
||||
import { AnthropicWire_API_Message_Create, AnthropicWire_Blocks } from '../../wiretypes/anthropic.wiretypes';
|
||||
|
||||
import { aixSpillShallFlush, aixSpillSystemToUser, approxDocPart_To_String, approxInReferenceTo_To_XMLString } from './adapters.common';
|
||||
|
||||
|
||||
// configuration
|
||||
const hotFixImagePartsFirst = true;
|
||||
@@ -14,7 +14,10 @@ const hotFixMapModelImagesToUser = true;
|
||||
|
||||
type TRequest = AnthropicWire_API_Message_Create.Request;
|
||||
|
||||
export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest {
|
||||
export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest {
|
||||
|
||||
// Pre-process CGR - approximate spill of System to User message
|
||||
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
|
||||
|
||||
// Convert the system message
|
||||
let systemMessage: TRequest['system'] = undefined;
|
||||
@@ -30,6 +33,10 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: A
|
||||
acc.push(AnthropicWire_Blocks.TextBlock(approxDocPart_To_String(part)));
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
// we have already removed image parts from the system message
|
||||
throw new Error('Anthropic: images have to be in user messages, not in system message');
|
||||
|
||||
case 'meta_cache_control':
|
||||
if (!acc.length)
|
||||
console.warn('Anthropic: cache_control without a message to attach to');
|
||||
@@ -40,6 +47,7 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: A
|
||||
break;
|
||||
|
||||
default:
|
||||
const _exhaustiveCheck: never = part;
|
||||
throw new Error(`Unsupported part type in System message: ${(part as any).pt}`);
|
||||
}
|
||||
return acc;
|
||||
@@ -76,6 +84,12 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, chatGenerate: A
|
||||
}
|
||||
currentMessage.content.push(content);
|
||||
}
|
||||
|
||||
// Flush: interrupt batching within the same-role and finalize the current message
|
||||
if (aixSpillShallFlush(aixMessage) && currentMessage) {
|
||||
chatMessages.push(currentMessage);
|
||||
currentMessage = null;
|
||||
}
|
||||
}
|
||||
if (currentMessage)
|
||||
chatMessages.push(currentMessage);
|
||||
@@ -305,27 +319,3 @@ function _toAnthropicToolChoice(itp: AixTools_ToolsPolicy): NonNullable<TRequest
|
||||
return { type: 'tool' as const, name: itp.function_call.name };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Approximate conversions - alternative approaches should be tried until we find the best one
|
||||
|
||||
export function approxDocPart_To_String({ ref, data }: AixParts_DocPart /*, wrapFormat?: 'markdown-code'*/): string {
|
||||
// NOTE: Consider a better representation here
|
||||
//
|
||||
// We use the 'legacy' markdown encoding, but we may consider:
|
||||
// - '<doc id='ref' title='title' version='version'>\n...\n</doc>'
|
||||
// - ```doc id='ref' title='title' version='version'\n...\n```
|
||||
// - # Title [id='ref' version='version']\n...\n
|
||||
// - ...more ideas...
|
||||
//
|
||||
return '```' + (ref || '') + '\n' + data.text + '\n```\n';
|
||||
}
|
||||
|
||||
export function approxInReferenceTo_To_XMLString(irt: AixParts_MetaInReferenceToPart): string | null {
|
||||
const refs = irt.referTo.map(r => escapeXml(r.mText));
|
||||
if (!refs.length)
|
||||
return null; // `<context>User provides no specific references</context>`;
|
||||
return refs.length === 1
|
||||
? `<context>User refers to this in particular:<ref>${refs[0]}</ref></context>`
|
||||
: `<context>User refers to ${refs.length} items:<ref>${refs.join('</ref><ref>')}</ref></context>`;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixParts_DocPart, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../api/aix.wiretypes';
|
||||
import { GeminiWire_API_Generate_Content, GeminiWire_ContentParts, GeminiWire_Messages, GeminiWire_Safety, GeminiWire_ToolDeclarations } from '../../wiretypes/gemini.wiretypes';
|
||||
|
||||
import { approxDocPart_To_String, approxInReferenceTo_To_XMLString } from './anthropic.messageCreate';
|
||||
import { aixSpillSystemToUser, approxDocPart_To_String, approxInReferenceTo_To_XMLString } from './adapters.common';
|
||||
|
||||
|
||||
// configuration
|
||||
@@ -9,10 +9,13 @@ const hotFixImagePartsFirst = true; // https://ai.google.dev/gemini-api/docs/ima
|
||||
const hotFixReplaceEmptyMessagesWithEmptyTextPart = true;
|
||||
|
||||
|
||||
export function aixToGeminiGenerateContent(model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest {
|
||||
export function aixToGeminiGenerateContent(model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, geminiSafetyThreshold: GeminiWire_Safety.HarmBlockThreshold, jsonOutput: boolean, _streaming: boolean): TRequest {
|
||||
|
||||
// Note: the streaming setting is ignored as it only belongs in the path
|
||||
|
||||
// Pre-process CGR - approximate spill of System to User message - note: no need to flush as every message is not batched
|
||||
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
|
||||
|
||||
// System Instructions
|
||||
let systemInstruction: TRequest['systemInstruction'] = undefined;
|
||||
if (chatGenerate.systemMessage?.parts.length) {
|
||||
@@ -27,6 +30,10 @@ export function aixToGeminiGenerateContent(model: AixAPI_Model, chatGenerate: Ai
|
||||
acc.parts.push(GeminiWire_ContentParts.TextPart(approxDocPart_To_String(part)));
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
// we have already removed image parts from the system message
|
||||
throw new Error('Gemini: images have to be in user messages, not in system message');
|
||||
|
||||
case 'meta_cache_control':
|
||||
// ignore this breakpoint hint - Anthropic only
|
||||
break;
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { OpenAIDialects } from '~/modules/llms/server/openai/openai.router'
|
||||
import { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixMessages_SystemMessage, AixParts_DocPart, AixParts_InlineAudioPart, AixParts_MetaInReferenceToPart, AixTools_ToolDefinition, AixTools_ToolsPolicy } from '../../../api/aix.wiretypes';
|
||||
import { OpenAIWire_API_Chat_Completions, OpenAIWire_ContentParts, OpenAIWire_Messages } from '../../wiretypes/openai.wiretypes';
|
||||
|
||||
import { approxDocPart_To_String } from './anthropic.messageCreate';
|
||||
import { aixSpillShallFlush, aixSpillSystemToUser, approxDocPart_To_String } from './adapters.common';
|
||||
|
||||
|
||||
//
|
||||
@@ -29,7 +29,10 @@ const approxSystemMessageJoiner = '\n\n---\n\n';
|
||||
type TRequest = OpenAIWire_API_Chat_Completions.Request;
|
||||
type TRequestMessages = TRequest['messages'];
|
||||
|
||||
export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
|
||||
export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
|
||||
|
||||
// Pre-process CGR - approximate spill of System to User message
|
||||
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
|
||||
|
||||
// Dialect incompatibilities -> Hotfixes
|
||||
const hotFixAlternateUserAssistantRoles = openAIDialect === 'deepseek' || openAIDialect === 'perplexity';
|
||||
@@ -362,6 +365,10 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
msg0TextParts.push(aixDocPart_to_OpenAITextContent(part));
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
// we have already removed image parts from the system message
|
||||
throw new Error('OpenAI ChatCompletions: images have to be in user messages, not in system message');
|
||||
|
||||
case 'meta_cache_control':
|
||||
// ignore this breakpoint hint - Anthropic only
|
||||
break;
|
||||
@@ -386,7 +393,9 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
|
||||
|
||||
// Convert the messages
|
||||
for (const { parts, role } of chatSequence) {
|
||||
let allowAppend = true;
|
||||
for (const aixMessage of chatSequence) {
|
||||
const { parts, role } = aixMessage;
|
||||
switch (role) {
|
||||
|
||||
case 'user':
|
||||
@@ -398,20 +407,22 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
const textContentPart = OpenAIWire_ContentParts.TextContentPart(part.text);
|
||||
|
||||
// Append to existing content[], or new message
|
||||
if (currentMessage?.role === 'user' && Array.isArray(currentMessage.content))
|
||||
if (allowAppend && currentMessage?.role === 'user' && Array.isArray(currentMessage.content))
|
||||
currentMessage.content.push(textContentPart);
|
||||
else
|
||||
chatMessages.push({ role: 'user', content: hotFixPreferArrayUserContent ? [textContentPart] : textContentPart.text });
|
||||
allowAppend = true;
|
||||
break;
|
||||
|
||||
case 'doc':
|
||||
const docContentPart = aixDocPart_to_OpenAITextContent(part);
|
||||
|
||||
// Append to existing content[], or new message
|
||||
if (currentMessage?.role === 'user' && Array.isArray(currentMessage.content))
|
||||
if (allowAppend && currentMessage?.role === 'user' && Array.isArray(currentMessage.content))
|
||||
currentMessage.content.push(docContentPart);
|
||||
else
|
||||
chatMessages.push({ role: 'user', content: hotFixPreferArrayUserContent ? [docContentPart] : docContentPart.text });
|
||||
allowAppend = true;
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
@@ -421,10 +432,11 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
const imageContentPart = OpenAIWire_ContentParts.ImageContentPart(base64DataUrl, hotFixForceImageContentPartOpenAIDetail);
|
||||
|
||||
// Append to existing content[], or new message
|
||||
if (currentMessage?.role === 'user' && Array.isArray(currentMessage.content))
|
||||
if (allowAppend && currentMessage?.role === 'user' && Array.isArray(currentMessage.content))
|
||||
currentMessage.content.push(imageContentPart);
|
||||
else
|
||||
chatMessages.push({ role: 'user', content: [imageContentPart] });
|
||||
allowAppend = true;
|
||||
break;
|
||||
|
||||
case 'meta_cache_control':
|
||||
@@ -443,6 +455,9 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
|
||||
throw new Error(`Unsupported part type in User message: ${(part as any).pt}`);
|
||||
}
|
||||
}
|
||||
|
||||
// If this message shall be flushed, disallow append once next
|
||||
allowAppend = !aixSpillShallFlush(aixMessage);
|
||||
break;
|
||||
|
||||
case 'model':
|
||||
|
||||
@@ -4,7 +4,7 @@ import { AixAPI_Model, AixAPIChatGenerate_Request, AixMessages_ChatMessage, AixM
|
||||
import { OpenAIWire_API_Responses, OpenAIWire_Responses_Items, OpenAIWire_Responses_Tools } from '../../wiretypes/openai.wiretypes';
|
||||
|
||||
import { aixDocPart_to_OpenAITextContent, aixMetaRef_to_OpenAIText, aixTexts_to_OpenAIInstructionText } from './openai.chatCompletions';
|
||||
import { approxDocPart_To_String } from './anthropic.messageCreate';
|
||||
import { aixSpillShallFlush, aixSpillSystemToUser, approxDocPart_To_String } from './adapters.common';
|
||||
|
||||
|
||||
// configuration
|
||||
@@ -23,7 +23,10 @@ type TRequestTool = OpenAIWire_Responses_Tools.Tool;
|
||||
* - much side functionality is not implemented yet
|
||||
* - testing with o3-pro only for now
|
||||
*/
|
||||
export function aixToOpenAIResponses(openAIDialect: OpenAIDialects, model: AixAPI_Model, chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
|
||||
export function aixToOpenAIResponses(openAIDialect: OpenAIDialects, model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
|
||||
|
||||
// Pre-process CGR - approximate spill of System to User message
|
||||
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
|
||||
|
||||
// [OpenAI] Vendor-specific model checks
|
||||
const isOpenAIOFamily = ['gpt-6', 'gpt-5', 'o4', 'o3', 'o1'].some(_id => model.id === _id || model.id.startsWith(_id + '-'));
|
||||
@@ -168,6 +171,10 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage
|
||||
instructionsParts.push(aixDocPart_to_OpenAITextContent(part).text);
|
||||
break;
|
||||
|
||||
case 'inline_image':
|
||||
// we have already removed image parts from the system message
|
||||
throw new Error('OpenAI Responses: images have to be in user messages, not in system message');
|
||||
|
||||
case 'meta_cache_control':
|
||||
// ignore this breakpoint hint - Anthropic only
|
||||
break;
|
||||
@@ -187,10 +194,11 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage
|
||||
type FunctionCallMessage = OpenAIWire_Responses_Items.OutputFunctionCallItem;
|
||||
type FunctionCallOutputMessage = OpenAIWire_Responses_Items.FunctionToolCallOutput;
|
||||
|
||||
let allowUserAppend = true;
|
||||
function userMessage() {
|
||||
// Ensure the last message is a user message, or create a new one
|
||||
let lastMessage = chatMessages.length ? chatMessages[chatMessages.length - 1] : undefined;
|
||||
if (lastMessage && lastMessage.type === 'message' && lastMessage.role === 'user')
|
||||
if (allowUserAppend && lastMessage && lastMessage.type === 'message' && lastMessage.role === 'user')
|
||||
return lastMessage;
|
||||
const newMessage: UserMessage = {
|
||||
type: 'message',
|
||||
@@ -198,6 +206,7 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage
|
||||
content: [],
|
||||
};
|
||||
chatMessages.push(newMessage);
|
||||
allowUserAppend = true;
|
||||
return newMessage;
|
||||
}
|
||||
|
||||
@@ -245,7 +254,8 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage
|
||||
* - assistant messages to the old Input Message format (which doesn't need IDs)
|
||||
*
|
||||
*/
|
||||
for (const { role: messageRole, parts: messageParts } of chatSequence) {
|
||||
for (const aixMessage of chatSequence) {
|
||||
const { role: messageRole, parts: messageParts } = aixMessage;
|
||||
|
||||
switch (messageRole) {
|
||||
case 'user':
|
||||
@@ -295,6 +305,9 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage
|
||||
throw new Error(`Unsupported part type in User message: ${uPt}`);
|
||||
}
|
||||
}
|
||||
|
||||
// If this message shall be flushed, disallow append once next
|
||||
allowUserAppend = !aixSpillShallFlush(aixMessage);
|
||||
break;
|
||||
|
||||
case 'model':
|
||||
|
||||
Reference in New Issue
Block a user