From b41f930d086d6c2d299a26f227285994434fef49 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Sat, 15 Mar 2025 01:19:41 -0700 Subject: [PATCH] Gemini: improve response parsing (excl. candidates) --- .../dispatch/wiretypes/gemini.wiretypes.ts | 68 +++++++++++++++---- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts index 4a983e8aa..ed223ff5e 100644 --- a/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/gemini.wiretypes.ts @@ -48,6 +48,15 @@ export namespace GeminiWire_ContentParts { 'text/rtf', ]); + export const ContentPartModality_enum = z.enum([ + 'MODALITY_UNSPECIFIED', + 'TEXT', // plain text + 'IMAGE', + 'VIDEO', + 'AUDIO', + 'DOCUMENT', // e.g. PDF + ]); + /// Content parts - Input export const TextPart_schema = z.object({ @@ -291,16 +300,18 @@ export namespace GeminiWire_ToolDeclarations { export namespace GeminiWire_Safety { - /// Rating + /// Safety Rating export const HarmCategory_enum = z.enum([ 'HARM_CATEGORY_UNSPECIFIED', + // PaLM-only classifications: 'HARM_CATEGORY_DEROGATORY', 'HARM_CATEGORY_TOXICITY', 'HARM_CATEGORY_VIOLENCE', 'HARM_CATEGORY_SEXUAL', 'HARM_CATEGORY_MEDICAL', 'HARM_CATEGORY_DANGEROUS', + // Gemini classifications: 'HARM_CATEGORY_HARASSMENT', 'HARM_CATEGORY_HATE_SPEECH', 'HARM_CATEGORY_SEXUALLY_EXPLICIT', @@ -328,18 +339,19 @@ export namespace GeminiWire_Safety { export type HarmBlockThreshold = z.infer; export const HarmBlockThreshold_enum = z.enum([ 'HARM_BLOCK_THRESHOLD_UNSPECIFIED', - 'BLOCK_LOW_AND_ABOVE', - 'BLOCK_MEDIUM_AND_ABOVE', - 'BLOCK_ONLY_HIGH', // Content with NEGLIGIBLE, LOW, and MEDIUM will be allowed. - 'BLOCK_NONE', // All content will be allowed. + 'BLOCK_LOW_AND_ABOVE', // allows NEGLIGIBLE + 'BLOCK_MEDIUM_AND_ABOVE', // allows NEGLIGIBLE, LOW + 'BLOCK_ONLY_HIGH', // allows NEGLIGIBLE, LOW, MEDIUM + 'BLOCK_NONE', // allows all /** * 2025-01-10: see bug #720 and https://discuss.ai.google.dev/t/flash-2-0-doesnt-respect-block-none-on-all-harm-categories/59352/1 */ - 'OFF', // Turn off the safety filter. + 'OFF', // turns off the safety filter. ]); export const SafetySetting_schema = z.object({ category: HarmCategory_enum, + /** Block at and beyond a specified harm probability. */ threshold: HarmBlockThreshold_enum, }); @@ -355,7 +367,9 @@ export namespace GeminiWire_Safety { ]); export const PromptFeedback_schema = z.object({ + /** Optional. If set, the prompt was blocked and no candidates are returned. */ blockReason: BlockReason_enum.optional(), + /** At most one rating per category. */ safetyRatings: z.array(SafetyRating_schema), }); @@ -552,13 +566,41 @@ export namespace GeminiWire_API_Generate_Content { // logprobsResult: LogprobsResult_schema.optional(), }); - const UsageMetadata_schema = z.object({ - promptTokenCount: z.number(), - candidatesTokenCount: z.number().optional(), // .optional: in case the first message is 'RECITATION' there could be no output token count - // totalTokenCount: z.number(), - // cachedContentTokenCount: z.number().optional(), // Not supported for now, hence disabled + + const ModalityTokenCount_schema = z.object({ + modality: GeminiWire_ContentParts.ContentPartModality_enum, + tokenCount: z.number(), }); + const UsageMetadata_schema = z.object({ + // effective prompt size, including tokens in the cached content + promptTokenCount: z.number(), + + // (usually there: missing on first packets, or 'RECITATION' answers) total tokens across all the generated candidates + candidatesTokenCount: z.number().optional(), + + // (never missing, but optional for future safety) total tokens across all the generated candidates + // if candidatesTokenCount is missing, this is = promptTokenCount + totalTokenCount: z.number().optional(), + + // Input parts + // (optional: only if caching) tokens in the cached part of the prompt (the cached content) + cachedContentTokenCount: z.number().optional(), + // (optional: only if tool usage) tokens in tool-use prompt(s) + toolUsePromptTokenCount: z.number().optional(), + + // Output parts + // (optional: only for thinking models - and not in all packets) tokens of thoughts for thinking models + thoughtsTokenCount: z.number().optional(), + + // Modality breakdowns - mostly commented out because we don't want to spend energy parsing them for now (we don't use them) + promptTokensDetails: z.array(ModalityTokenCount_schema).optional(), + // cacheTokensDetails: z.array(ModalityTokenCount_schema).optional(), + // candidatesTokensDetails: z.array(ModalityTokenCount_schema).optional(), + // toolUsePromptTokensDetails: z.array(ModalityTokenCount_schema).optional(), + }); + + export type Response = z.infer; export const Response_schema = z.object({ candidates: z.array(Candidate_schema) @@ -569,9 +611,7 @@ export namespace GeminiWire_API_Generate_Content { * Note: seems to be present on all packets now, so we're commending the .optional() */ usageMetadata: UsageMetadata_schema, // .optional() - /** - * The real model version used to generate the response (what we got, not what we asked for). - */ + /** Real model version used to generate the response (what we got, not what we asked for). */ modelVersion: z.string(), });