mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 06:00:15 -07:00
794 lines
36 KiB
TypeScript
794 lines
36 KiB
TypeScript
import type { Immutable } from '~/common/types/immutable.types';
|
||
import { getImageAsset } from '~/common/stores/blob/dblobs-portability';
|
||
|
||
import { DLLM, LLM_IF_HOTFIX_NoStream, LLM_IF_HOTFIX_NoWebP, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_StripSys0, LLM_IF_HOTFIX_Sys0ToUsr0 } from '~/common/stores/llms/llms.types';
|
||
import { DMessage, DMessageRole, DMetaReferenceItem, MESSAGE_FLAG_AIX_SKIP, MESSAGE_FLAG_VND_ANT_CACHE_AUTO, MESSAGE_FLAG_VND_ANT_CACHE_USER, messageHasUserFlag } from '~/common/stores/chat/chat.message';
|
||
import { DMessageFragment, DMessageImageRefPart, DMessageZyncAssetReferencePart, isContentOrAttachmentFragment, isToolResponseFunctionCallPart, isVoidThinkingFragment } from '~/common/stores/chat/chat.fragments';
|
||
import { Is } from '~/common/util/pwaUtils';
|
||
import { convert_Base64WithMimeType_To_Blob, convert_Blob_To_Base64 } from '~/common/util/blobUtils';
|
||
import { imageBlobConvertType, imageBlobResizeIfNeeded, LLMImageResizeMode } from '~/common/util/imageUtils';
|
||
|
||
// NOTE: pay particular attention to the "import type", as this is importing from the server-side Zod definitions
|
||
import type { AixAPIChatGenerate_Request, AixMessages_ModelMessage, AixMessages_UserMessage, AixParts_InlineImagePart, AixParts_MetaCacheControl, AixParts_MetaInReferenceToPart, AixParts_ModelAuxPart } from '../server/api/aix.wiretypes';
|
||
|
||
// TODO: remove console messages to zero, or replace with throws or something
|
||
|
||
|
||
// configuration
|
||
const MODEL_IMAGE_RESCALE_MIMETYPE = !Is.Browser.Safari ? 'image/webp' : 'image/jpeg';
|
||
const MODEL_IMAGE_RESCALE_QUALITY = 0.90;
|
||
const IGNORE_CGR_NO_IMAGE_DEREFERENCE = true; // set to false to raise an exception, otherwise the CGR will continue skipping the part
|
||
const AUTO_SYSTEM_IMAGES_INDEX = true; // set to false to disable the small index of images (in system instruction)
|
||
|
||
|
||
// AIX <> Simple Text API helpers
|
||
|
||
/**
|
||
* The simplest text-only inputs for aixChatGenerateContent_DMessage.
|
||
*/
|
||
export type AixChatGenerate_TextMessages = {
|
||
role: 'user' | 'model';
|
||
text: string;
|
||
}[];
|
||
|
||
export function aixCGR_FromSimpleText(systemInstruction: null | string, messages: AixChatGenerate_TextMessages): AixAPIChatGenerate_Request {
|
||
return {
|
||
systemMessage: systemInstruction === null ? null : aixCGR_SystemMessageText(systemInstruction),
|
||
chatSequence: messages.map(m => {
|
||
switch (m.role) {
|
||
case 'user':
|
||
return aixCGR_UserMessageText(m.text);
|
||
case 'model':
|
||
return aixCGR_ModelMessageText(m.text);
|
||
}
|
||
}),
|
||
};
|
||
}
|
||
|
||
export function aixCGR_SystemMessageText(text: string) {
|
||
return { parts: [aixCGRTextPart(text)] };
|
||
}
|
||
|
||
export function aixCGR_UserMessageText(text: string): AixMessages_UserMessage {
|
||
return { role: 'user', parts: [aixCGRTextPart(text)] };
|
||
}
|
||
|
||
function aixCGR_ModelMessageText(text: string): AixMessages_ModelMessage {
|
||
return { role: 'model', parts: [aixCGRTextPart(text)] };
|
||
}
|
||
|
||
function aixCGRTextPart(text: string) {
|
||
return { pt: 'text' as const, text };
|
||
}
|
||
|
||
|
||
//
|
||
// AIX <> Chat Messages API helpers
|
||
//
|
||
|
||
|
||
export async function aixCGR_SystemMessage_FromDMessageOrThrow(
|
||
systemInstruction: null | Pick<DMessage, 'fragments' | 'metadata' | 'userFlags'>,
|
||
): Promise<AixAPIChatGenerate_Request['systemMessage']> {
|
||
|
||
// quick bypass for no message
|
||
if (!systemInstruction)
|
||
return null;
|
||
|
||
// create the system instruction
|
||
const sm: AixAPIChatGenerate_Request['systemMessage'] = {
|
||
parts: [],
|
||
};
|
||
|
||
// collect image description texts during conversion
|
||
const imageDescriptionTexts: string[] = [];
|
||
|
||
// process fragments of the system instruction
|
||
for (const sFragment of systemInstruction.fragments) {
|
||
switch (sFragment.ft) {
|
||
|
||
// Content Fragments - system has [ Text: the good old system instruction ]
|
||
case 'content':
|
||
switch (sFragment.part.pt) {
|
||
// text parts are copied as-is
|
||
case 'text':
|
||
sm.parts.push(sFragment.part);
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = sFragment.part;
|
||
// noinspection FallThroughInSwitchStatementJS
|
||
case 'reference':
|
||
case 'image_ref':
|
||
case 'tool_invocation':
|
||
case 'tool_response':
|
||
case 'hosted_resource':
|
||
case 'error':
|
||
case '_pt_sentinel':
|
||
console.warn('[DEV] aixCGR_systemMessageFromInstruction: unexpected System Content fragment', { sFragment });
|
||
break;
|
||
}
|
||
break;
|
||
|
||
// Attachment Fragments - system has [ Doc: document attachments, such as files, etf., Reference: to Zync parts, including Image which is the only one supported, ... ]
|
||
case 'attachment':
|
||
switch (sFragment.part.pt) {
|
||
// doc parts are copied as-is
|
||
case 'doc':
|
||
sm.parts.push(sFragment.part);
|
||
break;
|
||
|
||
// reference: image parts are supported
|
||
case 'reference':
|
||
const refPart = sFragment.part;
|
||
const refPartRt = refPart.rt;
|
||
switch (refPartRt) {
|
||
case 'zync':
|
||
const zt = refPart.zType;
|
||
switch (zt) {
|
||
case 'asset':
|
||
const at = refPart.assetType;
|
||
switch (at) {
|
||
case 'audio':
|
||
// dereference the Zync Audio Asset, converting it to an inline buffer
|
||
throw '[DEV] audio assets from the user are not supported yet';
|
||
|
||
case 'image':
|
||
// dereference the Zync Image Asset, converting it to an inline image
|
||
const resizeMode = false; // keep the image as-is, do not diminish quality; as any resize was done at the Persona edit time
|
||
try {
|
||
sm.parts.push(await aixConvertZyncImageAssetRefToInlineImageOrThrow(refPart, resizeMode));
|
||
|
||
// NOTE: we SHALL make this more generic, but it's okay for the time being
|
||
if (AUTO_SYSTEM_IMAGES_INDEX) {
|
||
// Generate description text using pure function
|
||
const title = sFragment?.ft === 'attachment' ? sFragment.title : undefined;
|
||
// const caption = sFragment?.ft === 'attachment' ? sFragment.caption : undefined;
|
||
const altText = refPart.zRefSummary?.text || refPart._legacyImageRefPart?.altText;
|
||
let width = refPart._legacyImageRefPart?.width;
|
||
let height = refPart._legacyImageRefPart?.height;
|
||
let prompt: string | undefined;
|
||
let author: string | undefined;
|
||
|
||
// Try to get additional metadata from the image asset
|
||
try {
|
||
if (refPart._legacyImageRefPart) {
|
||
const dataRef = refPart._legacyImageRefPart.dataRef;
|
||
if (dataRef.reftype === 'dblob' && 'dblobAssetId' in dataRef) {
|
||
const imageAsset = await getImageAsset(dataRef.dblobAssetId);
|
||
if (imageAsset) {
|
||
width = imageAsset.metadata.width;
|
||
height = imageAsset.metadata.height;
|
||
author = imageAsset.metadata.author;
|
||
// Extract info from origin
|
||
if (imageAsset.origin.ot === 'generated') {
|
||
prompt = imageAsset.origin.prompt;
|
||
author = imageAsset.origin.generatorName;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} catch {
|
||
// Continue without additional metadata if asset fetch fails
|
||
}
|
||
|
||
// Build description text inline
|
||
const parts: string[] = [];
|
||
parts.push(title || 'Image');
|
||
if (width && height) parts.push(`(${width}×${height})`);
|
||
if (altText && altText !== title) parts.push(`- ${altText}`);
|
||
if (prompt) {
|
||
parts.push(`- Generated from: "${prompt}"`);
|
||
if (author) parts.push(`by ${author}`);
|
||
} else if (author) parts.push(`- Author: ${author}`);
|
||
// if (caption && caption !== altText) parts.push(`- ${caption}`);
|
||
const descriptionText = parts.join(' ');
|
||
imageDescriptionTexts.push(descriptionText);
|
||
}
|
||
|
||
} catch (error: any) {
|
||
if (IGNORE_CGR_NO_IMAGE_DEREFERENCE)
|
||
console.warn(`Zync asset reference from the system instruction missing in the chat generation request because: ${error?.message || error?.toString() || 'Unknown error'} - continuing without`);
|
||
else throw error;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = at;
|
||
console.warn('[DEV] aixCGR_systemMessageFromInstruction: unexpected System Reference fragment Asset type', at);
|
||
break;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = zt;
|
||
break;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = refPartRt;
|
||
// noinspection FallThroughInSwitchStatementJS
|
||
case '_sentinel':
|
||
console.warn('[DEV] aixCGR_systemMessageFromInstruction: unexpected System Reference fragment', { sFragment });
|
||
break;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = sFragment.part;
|
||
// noinspection FallThroughInSwitchStatementJS
|
||
case 'image_ref':
|
||
case '_pt_sentinel':
|
||
console.warn('[DEV] aixCGR_systemMessageFromInstruction: unexpected System Attachment fragment', { sFragment });
|
||
break;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = sFragment;
|
||
// noinspection FallThroughInSwitchStatementJS
|
||
case 'void':
|
||
case '_ft_sentinel':
|
||
console.warn('[DEV] aixCGR_systemMessageFromInstruction: unexpected System Fragment type', { sFragment });
|
||
break;
|
||
}
|
||
}
|
||
|
||
// Add rich image descriptions if there are images that will be spilled over
|
||
if (AUTO_SYSTEM_IMAGES_INDEX && imageDescriptionTexts.length > 0) {
|
||
const firstImageIndex = sm.parts.findIndex(part => part.pt === 'inline_image');
|
||
if (firstImageIndex >= 0) {
|
||
const enHeading = imageDescriptionTexts.length === 1
|
||
? 'Note: There is 1 image attached to this system instruction that will appear in the following user message:'
|
||
: `Note: There are ${imageDescriptionTexts.length} images attached to this system instruction that will appear in the following user message:`;
|
||
const indexText = [enHeading, ...imageDescriptionTexts].join('\n - ');
|
||
|
||
// Insert the descriptive text before the first image
|
||
sm.parts.splice(firstImageIndex, 0, { pt: 'text', text: indexText });
|
||
}
|
||
}
|
||
|
||
// (on System message) handle the ant-cache-prompt user/auto flags
|
||
const mHasAntCacheFlag = messageHasUserFlag(systemInstruction, MESSAGE_FLAG_VND_ANT_CACHE_AUTO) || messageHasUserFlag(systemInstruction, MESSAGE_FLAG_VND_ANT_CACHE_USER);
|
||
if (mHasAntCacheFlag
|
||
&& sm.parts.length > 0 // added this to avoid settings a cache control on an empty system message
|
||
)
|
||
sm.parts.push(_clientCreateAixMetaCacheControlPart('anthropic-ephemeral'));
|
||
|
||
return sm;
|
||
}
|
||
|
||
|
||
export async function aixCGR_ChatSequence_FromDMessagesOrThrow(
|
||
messageSequenceWithoutSystem: Readonly<Pick<DMessage, 'role' | 'fragments' | 'metadata' | 'userFlags'>[]>, // Note: adding the "Pick" to show the low requirement from the DMessage type, as we'll move to simpler APIs soon
|
||
// _assemblyMode: 'complete' = 'complete',
|
||
): Promise<AixAPIChatGenerate_Request['chatSequence']> {
|
||
|
||
// if the user has marked messages for exclusion, we skip them
|
||
messageSequenceWithoutSystem = messageSequenceWithoutSystem.filter(m => !messageHasUserFlag(m, MESSAGE_FLAG_AIX_SKIP));
|
||
|
||
const lastAssistantMessageIndex = messageSequenceWithoutSystem.findLastIndex(m => m.role === 'assistant');
|
||
|
||
// reduce history
|
||
// NOTE: we used to have a "systemMessage" here, but we're moving to a more strict API with separate processing of it;
|
||
// - as such we now 'throw' if a system message is found (on dev mode, and just warn in production).
|
||
// - still, we keep the full reducer as a 'AixCGR_FromDmessages' type, in case we need more complex reductions in the future
|
||
const cgr = await messageSequenceWithoutSystem.reduce(async (accPromise, m, _index): Promise<AixAPIChatGenerate_Request> => {
|
||
const acc = await accPromise;
|
||
|
||
// (on any User/Assistant messages) check the ant-cache-prompt user/auto flags
|
||
const mHasAntCacheFlag = messageHasUserFlag(m, MESSAGE_FLAG_VND_ANT_CACHE_AUTO) || messageHasUserFlag(m, MESSAGE_FLAG_VND_ANT_CACHE_USER);
|
||
|
||
// in the new version we handle all parts and only expect User and Assistant DMessages - as the System has been handled separately
|
||
const dMessageRole: DMessageRole = m.role;
|
||
if (dMessageRole === 'user') {
|
||
|
||
const dMessageUserFragments = m.fragments;
|
||
const aixChatMessageUser = await dMessageUserFragments.reduce(async (uMsgPromise, uFragment: DMessageFragment) => {
|
||
|
||
const uMsg = await uMsgPromise;
|
||
if (!isContentOrAttachmentFragment(uFragment) || uFragment.part.pt === '_pt_sentinel')
|
||
return uMsg;
|
||
|
||
switch (uFragment.part.pt) {
|
||
case 'text':
|
||
uMsg.parts.push(uFragment.part);
|
||
break;
|
||
|
||
case 'reference':
|
||
const refPart = uFragment.part;
|
||
const refPartRt = refPart.rt;
|
||
switch (refPartRt) {
|
||
|
||
case 'zync':
|
||
const zt = refPart.zType;
|
||
switch (zt) {
|
||
|
||
case 'asset':
|
||
const at = refPart.assetType;
|
||
switch (at) {
|
||
|
||
case 'image':
|
||
// dereference the Zync Image Asset, converting it to an inline image
|
||
try {
|
||
uMsg.parts.push(await aixConvertZyncImageAssetRefToInlineImageOrThrow(refPart, false));
|
||
} catch (error: any) {
|
||
if (IGNORE_CGR_NO_IMAGE_DEREFERENCE) console.warn(`Zync asset reference from the user missing in the chat generation request because: ${error?.message || error?.toString() || 'Unknown error'} - continuing without`);
|
||
else throw error;
|
||
}
|
||
break;
|
||
|
||
case 'audio':
|
||
// dereference the Zync Audio Asset, converting it to an inline buffer
|
||
throw '[DEV] audio assets from the user are not supported yet';
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = at;
|
||
console.warn('aixCGR_FromDMessages: unexpected Zync asset type from the user', at);
|
||
break;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = zt;
|
||
break;
|
||
}
|
||
break;
|
||
|
||
case '_sentinel':
|
||
break; // not a real case
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = refPartRt;
|
||
console.warn('aixCGR_FromDMessages: unexpected User fragment part type', refPartRt);
|
||
}
|
||
break;
|
||
|
||
case 'image_ref':
|
||
// note, we don't resize, as the user image is resized following the user's preferences
|
||
try {
|
||
uMsg.parts.push(await aixConvertImageRefToInlineImageOrThrow(uFragment.part, false));
|
||
} catch (error: any) {
|
||
if (IGNORE_CGR_NO_IMAGE_DEREFERENCE) console.warn(`Image from the user missing in the chat generation request because: ${error?.message || error?.toString() || 'Unknown error'} - continuing without`);
|
||
else throw error;
|
||
}
|
||
break;
|
||
|
||
case 'doc':
|
||
uMsg.parts.push(uFragment.part);
|
||
break;
|
||
|
||
// skipped (non-user)
|
||
case 'error':
|
||
case 'tool_invocation':
|
||
case 'tool_response':
|
||
case 'hosted_resource':
|
||
console.warn('aixCGR_FromDMessages: unexpected Non-User fragment part type', (uFragment.part as any).pt);
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = uFragment.part;
|
||
console.warn('aixCGR_FromDMessages: unexpected User fragment part type', (uFragment.part as any).pt);
|
||
}
|
||
return uMsg;
|
||
}, Promise.resolve({ role: 'user', parts: [] } as AixMessages_UserMessage));
|
||
|
||
// handle in-reference-to metadata, adding a part right after the user text (or at the beginning)
|
||
if (m.metadata?.inReferenceTo?.length) {
|
||
// find the index of the last text part
|
||
const lastTextPartIndex = aixChatMessageUser.parts.findLastIndex(p => p.pt === 'text');
|
||
// insert the meta part after the last text part (and before the first attachment)
|
||
aixChatMessageUser.parts.splice(lastTextPartIndex + 1, 0, _clientCreateAixMetaInReferenceToPart(m.metadata.inReferenceTo));
|
||
}
|
||
|
||
// (on User messages) handle the ant-cache-prompt user/auto flags
|
||
if (mHasAntCacheFlag)
|
||
aixChatMessageUser.parts.push(_clientCreateAixMetaCacheControlPart('anthropic-ephemeral'));
|
||
|
||
acc.chatSequence.push(aixChatMessageUser);
|
||
|
||
} else if (dMessageRole === 'assistant') {
|
||
|
||
const modelMessage: AixMessages_ModelMessage = { role: 'model', parts: [] };
|
||
|
||
for (const aFragment of m.fragments) {
|
||
|
||
if ((!isContentOrAttachmentFragment(aFragment) && !isVoidThinkingFragment(aFragment)) || aFragment.part.pt === '_pt_sentinel')
|
||
continue;
|
||
|
||
// aPart is a DMessageFragment['part'], and we use TS for type narrowing
|
||
const { part: aPart, vendorState: _vnd } = aFragment;
|
||
switch (aPart.pt) {
|
||
|
||
case 'text':
|
||
case 'tool_invocation':
|
||
// Key place where the Aix Zod inferred types are compared to the Typescript defined DMessagePart* types
|
||
// - in case of error, check that the types in `chat.fragments.ts` and `aix.wiretypes.ts` are in sync
|
||
modelMessage.parts.push(_vnd ? { ...aPart, _vnd } : aPart);
|
||
break;
|
||
|
||
case 'ma':
|
||
// Preserve reasoning continuity across turns. Three channels, any one is sufficient:
|
||
// - Anthropic: part.textSignature / part.redactedData (bespoke fields, see Anthropic extended thinking docs)
|
||
// - OpenAI Responses / Gemini: _vnd sidecar (reasoningItem.* / thoughtSignature, opaque continuity handle)
|
||
// - DeepSeek V4 (OpenAI chat-completions): plain reasoning text in aText is the payload itself
|
||
const oaiReasoning = _vnd?.openai?.reasoningItem;
|
||
const hasReasoningHandle =
|
||
(aPart.textSignature || aPart.redactedData?.length)
|
||
|| (oaiReasoning?.encryptedContent || oaiReasoning?.id)
|
||
|| (aPart.aText && aPart.aType === 'reasoning'); // DeepSeek V4 reasoning in plain text - NOTE: will send LOTS of 'ma' parts (e.g. to Gemini, which doesn't even need them)
|
||
if (hasReasoningHandle) {
|
||
const aModelAuxPart = aPart as AixParts_ModelAuxPart; // NOTE: this is a forced cast from readonly string[] to string[], but not a big deal here
|
||
modelMessage.parts.push(_vnd ? { ...aModelAuxPart, _vnd } : aModelAuxPart);
|
||
} else {
|
||
// If none are present (e.g. summary-only reasoning from a vendor with no signed handle), drop the ma part silently;
|
||
// - passing a bare reasoning reference errors out on some providers (e.g. OpenAI stateless returns "Item with id rs_... not found. ... remove this item from your input.")
|
||
// console.log('[DEV] aixCGR_FromDMessages: dropping ma part from Assistant message as it has no reasoning handle', { aPart });
|
||
}
|
||
break;
|
||
|
||
case 'doc':
|
||
// TODO
|
||
console.warn('aixCGR_FromDMessages: doc part from Assistant not implemented yet');
|
||
// mMsg.parts.push(aPart);
|
||
break;
|
||
|
||
case 'error':
|
||
// Note: the llm will receive the extra '[ERROR]' text; this could be optimized to handle errors better
|
||
modelMessage.parts.push({ pt: 'text', text: `[ERROR] ${aPart.error}` });
|
||
break;
|
||
|
||
case 'reference':
|
||
const refPart = aPart;
|
||
const refPartRt = refPart.rt;
|
||
switch (refPartRt) {
|
||
|
||
case 'zync':
|
||
const zt = refPart.zType;
|
||
switch (zt) {
|
||
|
||
case 'asset':
|
||
const at = refPart.assetType;
|
||
switch (at) {
|
||
|
||
case 'image':
|
||
// dereference the Zync Image Asset, converting it to an inline image
|
||
const legacyImageRefPart = refPart._legacyImageRefPart;
|
||
const imageSize = legacyImageRefPart && legacyImageRefPart.dataRef.reftype === 'dblob' ? legacyImageRefPart?.dataRef?.bytesSize ?? 0 : 0;
|
||
const isLastAssistantMessage = _index === lastAssistantMessageIndex;
|
||
const resizeMode = !isLastAssistantMessage ? 'openai-low-res' : imageSize > 400_000 ? 'openai-high-res' : false;
|
||
try {
|
||
const aixPart = await aixConvertZyncImageAssetRefToInlineImageOrThrow(refPart, resizeMode);
|
||
modelMessage.parts.push(_vnd ? { ...aixPart, _vnd } : aixPart);
|
||
} catch (error: any) {
|
||
if (IGNORE_CGR_NO_IMAGE_DEREFERENCE) console.warn(`Zync asset reference from the assistant missing in the chat generation request because: ${error?.message || error?.toString() || 'Unknown error'} - continuing without`);
|
||
else throw error;
|
||
}
|
||
break;
|
||
|
||
case 'audio':
|
||
// dereference the Zync Audio Asset, converting it to an inline buffer
|
||
throw '[DEV] audio assets from the assistant are not supported yet';
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = at;
|
||
console.warn('aixCGR_FromDMessages: unexpected Zync asset type from the assistant', at);
|
||
break;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = zt;
|
||
break;
|
||
}
|
||
break;
|
||
|
||
case '_sentinel':
|
||
break; // not a real case
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = refPartRt;
|
||
console.warn('aixCGR_FromDMessages: unexpected Assistant fragment part type', refPartRt);
|
||
}
|
||
break;
|
||
|
||
case 'image_ref':
|
||
// TODO: rescale shall be dependent on the LLM here - and be careful with the high-res options, as they can
|
||
// be really space consuming. how to choose between high and low? global option?
|
||
/**
|
||
* FIXME for GEMINI IMAGE GENERATION
|
||
* For now we upload ONLY THE LAST IMAGE as full quality, while all others are resized before transmission.
|
||
*/
|
||
const imageSize = aPart.dataRef.reftype === 'dblob' ? aPart.dataRef?.bytesSize ?? 0 : 0;
|
||
const isLastAssistantMessage = _index === lastAssistantMessageIndex;
|
||
const resizeMode = !isLastAssistantMessage ? 'openai-low-res' : imageSize > 400_000 ? 'openai-high-res' : false;
|
||
try {
|
||
const aixPart = await aixConvertImageRefToInlineImageOrThrow(aPart, resizeMode);
|
||
modelMessage.parts.push(_vnd ? { ...aixPart, _vnd } : aixPart);
|
||
} catch (error: any) {
|
||
if (IGNORE_CGR_NO_IMAGE_DEREFERENCE) console.warn(`Image from the assistant missing in the chat generation request because: ${error?.message || error?.toString() || 'Unknown error'} - continuing without`);
|
||
else throw error;
|
||
}
|
||
break;
|
||
|
||
case 'tool_response':
|
||
// Validation of DMessageToolResponsePart of response.type: 'function_call'
|
||
// - NOTE: for now we make the large assumption that responses are JSON objects, not arrays, not strings
|
||
// - This was done for Gemini as the response needs to be an object; however we will need to decide:
|
||
// TODO: decide the responses policy: do we allow only objects? if not, then what's the rule to convert objects to Gemini's inputs?
|
||
if (isToolResponseFunctionCallPart(aPart)) {
|
||
let resultObject: any;
|
||
try {
|
||
resultObject = JSON.parse(aPart.response.result);
|
||
} catch (error: any) {
|
||
throw new Error('[AIX validation] expecting `tool_response` to be parseable');
|
||
}
|
||
if (!resultObject || typeof resultObject !== 'object')
|
||
throw new Error('[AIX validation] expecting `tool_response` to be a JSON object');
|
||
if (Array.isArray(resultObject))
|
||
throw new Error('[AIX validation for Gemini] expecting `tool_response` to not be an array');
|
||
}
|
||
modelMessage.parts.push(_vnd ? { ...aPart, _vnd } : aPart);
|
||
break;
|
||
|
||
case 'hosted_resource':
|
||
// Hosted resources are download-only artifacts - emit a text placeholder for model context
|
||
// NOTE: disabled for now - we don't know how usefult this hinting it, and we're clashing with proprietary Anthropic prompts
|
||
// modelMessage.parts.push({
|
||
// pt: 'text',
|
||
// text: `[Output file: ${aPart.resource.via === 'anthropic' ? aPart.resource.fileId : 'unknown'}]`,
|
||
// // ...(aPart.resource.via === 'anthropic' && {
|
||
// // _vnd: { anthropic: { containerUpload: { fileId: aPart.resource.fileId, ...(aPart.resource.containerId && { containerId: aPart.resource.containerId }) } } },
|
||
// // }),
|
||
// });
|
||
break;
|
||
|
||
default:
|
||
const _exhaustiveCheck: never = aPart;
|
||
console.warn('aixCGR_FromDMessages: unexpected Assistant fragment part', aPart);
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (modelMessage.parts.length > 0) {
|
||
|
||
// (on Assistant messages) handle the ant-cache-prompt user/auto flags, on the very last message
|
||
if (mHasAntCacheFlag)
|
||
modelMessage.parts.push(_clientCreateAixMetaCacheControlPart('anthropic-ephemeral'));
|
||
|
||
acc.chatSequence.push(modelMessage);
|
||
}
|
||
|
||
} else {
|
||
|
||
// DEV MODE: THROW ERROR, to aid the porting efforts
|
||
if (process.env.NODE_ENV === 'development')
|
||
throw new Error(`[DEV] aixCGR_FromDMessages: unexpected message role ${m.role}. Please PORT the caller to the systemIntruction API change.`);
|
||
|
||
// TODO: implement mid-chat system messages if needed
|
||
// NOTE: the API should just disallow 'system' messages in the middle of the chat
|
||
console.warn('[DEV] aixCGR_FromDMessages: unexpected message role', m.role);
|
||
|
||
}
|
||
|
||
return acc;
|
||
}, Promise.resolve({
|
||
systemMessage: null,
|
||
chatSequence: [],
|
||
} as Pick<AixAPIChatGenerate_Request, 'systemMessage' | 'chatSequence'>) /* this is the key to the new version of this function which doesn't extract system messages anymore */);
|
||
|
||
// as promised we only return this as we only built this, and not the full CGR.
|
||
return cgr.chatSequence;
|
||
}
|
||
|
||
|
||
/// Parts that differ from DMessage*Part to AIX
|
||
|
||
export async function aixConvertZyncImageAssetRefToInlineImageOrThrow(assetRefPart: Immutable<DMessageZyncAssetReferencePart>, resizeMode: LLMImageResizeMode | false): Promise<AixParts_InlineImagePart> {
|
||
|
||
// during transition period, use legacy fallback if available
|
||
if (assetRefPart._legacyImageRefPart)
|
||
return aixConvertImageRefToInlineImageOrThrow(assetRefPart._legacyImageRefPart, resizeMode);
|
||
|
||
// Future: Full Asset system implementation
|
||
// FIXME: [ASSET] get the Blob (local > download if missing) associated to the DAsset, pointed to by the reference, and convert it to base64 when used inline
|
||
// const asset = await zyncAssetActions().getAssetData(assetRefPart.zUuid);
|
||
// if (asset && asset.binaryData)
|
||
// return _clientCreateAixInlineImagePart(asset.binaryData.base64, asset.mimeType);
|
||
|
||
throw new Error(`Asset reference ${assetRefPart.zUuid} - Asset system not yet implemented or legacy fallback missing`);
|
||
}
|
||
|
||
export async function aixConvertImageRefToInlineImageOrThrow(imageRefPart: DMessageImageRefPart, resizeMode: LLMImageResizeMode | false): Promise<AixParts_InlineImagePart> {
|
||
|
||
// validate
|
||
const { dataRef } = imageRefPart;
|
||
if (dataRef.reftype !== 'dblob' || !('dblobAssetId' in dataRef)) {
|
||
console.warn('Image reference is not supported', imageRefPart);
|
||
throw new Error('Image reference is not supported');
|
||
}
|
||
|
||
// get image asset
|
||
const imageAsset = await getImageAsset(dataRef.dblobAssetId);
|
||
if (!imageAsset) {
|
||
console.warn('Image asset not found', imageRefPart);
|
||
throw new Error('Image asset not found');
|
||
}
|
||
|
||
// base64 -> blob conversion
|
||
let { mimeType, base64: base64Data } = imageAsset.data;
|
||
|
||
// convert if requested (with intermediate Blob transformation)
|
||
if (resizeMode) {
|
||
try {
|
||
// convert base64 -> Blob
|
||
const imageBlob = await convert_Base64WithMimeType_To_Blob(base64Data, mimeType, 'aixConvertImageRefToInlineImage');
|
||
// resize Blob
|
||
const resizedOp = await imageBlobResizeIfNeeded(imageBlob, resizeMode, MODEL_IMAGE_RESCALE_MIMETYPE, MODEL_IMAGE_RESCALE_QUALITY);
|
||
if (resizedOp) {
|
||
// if resized, convert resized Blob back to base64
|
||
base64Data = await convert_Blob_To_Base64(resizedOp.blob, 'aixConvertImageRefToInlineImage');
|
||
mimeType = resizedOp.blob.type as any;
|
||
}
|
||
} catch (resizeError) {
|
||
console.warn('[DEV] aixConvertImageRefToInlineImageOrThrow: Error resizing image:', resizeError);
|
||
// continue without resizing, as this is not critical
|
||
}
|
||
}
|
||
|
||
return _clientCreateAixInlineImagePart(base64Data, mimeType || dataRef.mimeType);
|
||
}
|
||
|
||
function _clientCreateAixInlineImagePart(base64: string, mimeType: string): AixParts_InlineImagePart {
|
||
return { pt: 'inline_image', mimeType: (mimeType || 'image/png') as AixParts_InlineImagePart['mimeType'], base64 };
|
||
}
|
||
|
||
function _clientCreateAixMetaCacheControlPart(control: AixParts_MetaCacheControl['control']): AixParts_MetaCacheControl {
|
||
return { pt: 'meta_cache_control', control: control };
|
||
}
|
||
|
||
function _clientCreateAixMetaInReferenceToPart(items: DMetaReferenceItem[]): AixParts_MetaInReferenceToPart {
|
||
return { pt: 'meta_in_reference_to', referTo: items };
|
||
}
|
||
|
||
|
||
/// Client-side hotfixes
|
||
|
||
|
||
export async function clientHotFixGenerateRequest_ApplyAll(llmInterfaces: DLLM['interfaces'], aixChatGenerate: AixAPIChatGenerate_Request, modelName: string): Promise<{
|
||
hotfixNoStream: boolean;
|
||
workaroundsCount: number;
|
||
}> {
|
||
|
||
let workaroundsCount = 0;
|
||
|
||
// Apply the remove-sys0 hot fix - at the time of doing it, Gemini Image Generation does not use the system instructions
|
||
if (llmInterfaces.includes(LLM_IF_HOTFIX_StripSys0))
|
||
workaroundsCount += clientHotFixGenerateRequest_StripSys0(aixChatGenerate);
|
||
|
||
// Apply the cast-sys0-to-usr0 hot fix (e.g. o1-preview); however this is a late-stage emergency hotfix as we expect the caller to be aware of this logic
|
||
if (llmInterfaces.includes(LLM_IF_HOTFIX_Sys0ToUsr0))
|
||
workaroundsCount += clientHotFixGenerateRequest_Sys0ToUsr0(aixChatGenerate);
|
||
|
||
// Apply the strip-images hot fix (e.g. o1-preview); however this is a late-stage emergency hotfix as we expect the caller to be aware of this logic
|
||
if (llmInterfaces.includes(LLM_IF_HOTFIX_StripImages))
|
||
workaroundsCount += clientHotFixGenerateRequest_StripImages(aixChatGenerate);
|
||
|
||
// Apply the no-webp hot fix - convert WebP images to JPEG (smaller) or PNG (lossless)
|
||
if (llmInterfaces.includes(LLM_IF_HOTFIX_NoWebP))
|
||
workaroundsCount += await clientHotFixGenerateRequest_ConvertWebP(aixChatGenerate, 'image/jpeg');
|
||
|
||
// Disable streaming for select chat models that don't support it (e.g. o1-preview (old) and o1-2024-12-17)
|
||
const hotfixNoStream = llmInterfaces.includes(LLM_IF_HOTFIX_NoStream);
|
||
|
||
if (workaroundsCount > 0)
|
||
console.warn(`[DEV] Working around '${modelName}' model limitations: client-side applied ${workaroundsCount} workarounds`);
|
||
|
||
return { hotfixNoStream, workaroundsCount };
|
||
|
||
}
|
||
|
||
|
||
/**
|
||
* Hot fix for models that don't support vision input and we need to perform the fix ahead of AIX send.
|
||
*
|
||
* Notes for the o1-2024-12-17 model:
|
||
* - we don't strip inline images, as o1 supports them
|
||
*/
|
||
function clientHotFixGenerateRequest_StripImages(aixChatGenerate: AixAPIChatGenerate_Request): number {
|
||
|
||
let workaroundsCount = 0;
|
||
|
||
// Note: other conversions that would translate to system inside the AIX Dispatch will be handled there, as we have a
|
||
// higher level representation here, where the roles are 'user', 'model', and 'tool'.
|
||
|
||
// Remove any inline images from the entire chat sequence
|
||
for (let i = 0; i < aixChatGenerate.chatSequence.length; i++) {
|
||
const message = aixChatGenerate.chatSequence[i];
|
||
|
||
// Iterate over message parts and remove inline images
|
||
for (let j = message.parts.length - 1; j >= 0; j--) {
|
||
if (message.parts[j].pt === 'inline_image') {
|
||
workaroundsCount++;
|
||
message.parts.splice(j, 1);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Log the number of workarounds applied
|
||
return workaroundsCount;
|
||
|
||
}
|
||
|
||
/**
|
||
* Hot fix for models that don't support WebP images - converts to JPEG or PNG
|
||
*/
|
||
async function clientHotFixGenerateRequest_ConvertWebP(aixChatGenerate: AixAPIChatGenerate_Request, toFormat: 'image/jpeg' | 'image/png'): Promise<number> {
|
||
|
||
let workaroundsCount = 0;
|
||
const quality = toFormat === 'image/jpeg' ? 0.92 : 1.0;
|
||
|
||
for (const message of aixChatGenerate.chatSequence) {
|
||
for (let j = 0; j < message.parts.length; j++) {
|
||
const part = message.parts[j];
|
||
if (part.pt === 'inline_image' && part.mimeType === 'image/webp') {
|
||
try {
|
||
const webpBlob = await convert_Base64WithMimeType_To_Blob(part.base64, 'image/webp', 'hotfix-no-webp');
|
||
const { blob: convertedBlob } = await imageBlobConvertType(webpBlob, toFormat, quality);
|
||
const convertedBase64 = await convert_Blob_To_Base64(convertedBlob, 'hotfix-no-webp');
|
||
message.parts[j] = { pt: 'inline_image', mimeType: toFormat, base64: convertedBase64 };
|
||
workaroundsCount++;
|
||
} catch (error) {
|
||
console.warn('[DEV] clientHotFixGenerateRequest_ConvertWebP: Error converting image:', error);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return workaroundsCount;
|
||
|
||
}
|
||
|
||
/**
|
||
* Hot fix for models that don't want the system message - e.g. Gemini Image Generation (although this may change)
|
||
*/
|
||
function clientHotFixGenerateRequest_StripSys0(aixChatGenerate: AixAPIChatGenerate_Request): number {
|
||
|
||
const workaroundsCount = aixChatGenerate.systemMessage?.parts?.length ? 1 : 0;
|
||
aixChatGenerate.systemMessage = null;
|
||
return workaroundsCount;
|
||
|
||
}
|
||
|
||
|
||
/**
|
||
* Hot fix for handling system messages in models that do not support them, such as `o1-preview`.
|
||
* -> Converts System to User messages for compatibility.
|
||
*
|
||
* Notes for the o1-2024-12-17 model:
|
||
* - we don't cast the system to user, as the aix dispatcher is casting the 'system' message to 'developer'
|
||
*/
|
||
function clientHotFixGenerateRequest_Sys0ToUsr0(aixChatGenerate: AixAPIChatGenerate_Request): number {
|
||
|
||
// Convert the main system message if it exists
|
||
if (!aixChatGenerate.systemMessage)
|
||
return 0;
|
||
|
||
// Convert system message to user message
|
||
const systemAsUser: AixMessages_UserMessage = {
|
||
role: 'user',
|
||
parts: aixChatGenerate.systemMessage.parts,
|
||
};
|
||
|
||
// Insert the converted system message at the beginning of the chat sequence (recreating the array to not alter the original)
|
||
aixChatGenerate.chatSequence = [...aixChatGenerate.chatSequence];
|
||
aixChatGenerate.chatSequence.unshift(systemAsUser);
|
||
|
||
// Remove the original system message
|
||
aixChatGenerate.systemMessage = null;
|
||
|
||
// Log the workaround applied
|
||
return 1;
|
||
|
||
} |