From 9bfcb50735a1bf47b8ab17afebedede4ae2865c5 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Sun, 29 Dec 2024 23:54:54 -0800 Subject: [PATCH] DeepSeek: cache pricing support --- .../dispatch/chatGenerate/parsers/openai.parser.ts | 10 ++++++++++ .../aix/server/dispatch/wiretypes/openai.wiretypes.ts | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts index 312904967..731b7de71 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/openai.parser.ts @@ -339,6 +339,16 @@ function _fromOpenAIUsage(usage: OpenAIWire_API_Chat_Completions.Response['usage } } + // [DeepSeek] Input redistribution: Cache Read + if (usage.prompt_cache_hit_tokens !== undefined) { + const TCacheRead = usage.prompt_cache_hit_tokens; + if (TCacheRead > 0) { + metricsUpdate.TCacheRead = TCacheRead; + if (usage.prompt_cache_miss_tokens !== undefined) + metricsUpdate.TIn = usage.prompt_cache_miss_tokens; + } + } + // TODO Input redistribution: Audio tokens // Output Metrics diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index 82660bed5..81dadbf06 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -384,6 +384,10 @@ export namespace OpenAIWire_API_Chat_Completions { accepted_prediction_tokens: z.number().optional(), // [OpenAI, 2024-11-05] Predicted Outputs rejected_prediction_tokens: z.number().optional(), // [OpenAI, 2024-11-05] Predicted Outputs }).optional(), // not present in other APIs yet + + // [DeepSeek, 2024-08-02] context caching on disk + prompt_cache_hit_tokens: z.number().optional(), + prompt_cache_miss_tokens: z.number().optional(), }).nullable(); const Choice_NS_schema = z.object({