From 239e332a28a772fbbaef89bbce985405f2b1eebe Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Wed, 24 Jul 2024 01:42:10 -0700 Subject: [PATCH] FF: fix resource usage parsing --- .../chatGenerate/parsers/openai.parser.ts | 14 +++++++++++++- .../dispatch/wiretypes/openai.wiretypes.ts | 16 +++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts index 094da8e32..3bbf2f1ef 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts @@ -52,7 +52,9 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct return function(pt: ChatGenerateTransmitter, eventData: string) { // Throws on malformed event data - const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(JSON.parse(eventData)); + // ```Can you extend the Zod chunk response object parsing (all optional) to include the missing data? The following is an exampel of the object I received:``` + const parsedData = JSON.parse(eventData); // this is here just for ease of breakpoint, otherwise it could be inlined + const json = OpenAIWire_API_Chat_Completions.ChunkResponse_schema.parse(parsedData); // -> Model if (!hasBegun && json.model) { @@ -85,6 +87,16 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct if (!json.choices.length) return; } + // [Groq] -> Stats + if (json.x_groq?.usage) { + const { prompt_tokens, completion_tokens, completion_time, total_time } = json.x_groq.usage; + pt.setCounters({ + chatIn: prompt_tokens, + chatOut: completion_tokens, + chatOutRate: (completion_tokens && completion_time) ? Math.round((completion_tokens / completion_time) * 100) / 100 : undefined, + chatTimeInner: completion_time, + }); + } // expect: 1 completion, or stop if (json.choices.length !== 1) diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 6df8d3095..0e0d58a1f 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -380,9 +380,23 @@ export namespace OpenAIWire_API_Chat_Completions { .nullable(), // [Grow, undocumented OpenAI] fingerprint is null on some OpenAI examples too // service_tier: z.unknown().optional(), - // undocumented streaming messages + // [OpenAI] undocumented streaming messages error: _UndocumentedError_schema.optional(), warning: _UndocumentedWarning_schema.optional(), + + // [Groq] undocumented statistics message + x_groq: z.object({ + id: z.string().optional(), + usage: z.object({ + queue_time: z.number().optional(), + prompt_tokens: z.number().optional(), + prompt_time: z.number().optional(), + completion_tokens: z.number().optional(), + completion_time: z.number().optional(), + total_tokens: z.number().optional(), + total_time: z.number().optional(), + }).optional(), + }).optional(), }); }