From cefe208abd0bf3d8e17579e720982f9a39e372f1 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Tue, 1 Oct 2024 11:25:11 -0700 Subject: [PATCH] OpenAI: mark the prompt caching models --- src/common/stores/llms/llms.types.ts | 5 ++++- src/modules/llms/server/llm.server.types.ts | 4 +++- .../server/openai/models/openai.models.ts | 22 +++++++++---------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/common/stores/llms/llms.types.ts b/src/common/stores/llms/llms.types.ts index a4f907ad1..03825f558 100644 --- a/src/common/stores/llms/llms.types.ts +++ b/src/common/stores/llms/llms.types.ts @@ -55,10 +55,12 @@ export type DModelInterfaceV1 = | 'oai-chat-fn' | 'oai-complete' | 'ant-prompt-caching' - | 'oai-o1-preview' // only append below this line + | 'oai-o1-preview' + | 'oai-prompt-caching' // only append below this line ; // Model interfaces (chat, and function calls) - here as a preview, will be used more broadly in the future +// FIXME: keep this in sync with the server side on modules/llms/server/llm.server.types.ts export const LLM_IF_OAI_Chat: DModelInterfaceV1 = 'oai-chat'; export const LLM_IF_OAI_Json: DModelInterfaceV1 = 'oai-chat-json'; export const LLM_IF_OAI_Vision: DModelInterfaceV1 = 'oai-chat-vision'; @@ -66,6 +68,7 @@ export const LLM_IF_OAI_Fn: DModelInterfaceV1 = 'oai-chat-fn'; export const LLM_IF_OAI_Complete: DModelInterfaceV1 = 'oai-complete'; export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching'; export const LLM_IF_SPECIAL_OAI_O1Preview: DModelInterfaceV1 = 'oai-o1-preview'; +export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching'; // Future changes? // export type DModelPartKind = 'text' | 'image' | 'audio' | 'video' | 'pdf'; diff --git a/src/modules/llms/server/llm.server.types.ts b/src/modules/llms/server/llm.server.types.ts index 112a9a5ad..3af233244 100644 --- a/src/modules/llms/server/llm.server.types.ts +++ b/src/modules/llms/server/llm.server.types.ts @@ -1,6 +1,6 @@ import { z } from 'zod'; -import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types'; +import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types'; export type ModelDescriptionSchema = z.infer; @@ -15,6 +15,7 @@ export type ModelDescriptionSchema = z.infer; /// Interfaces // TODO: just remove this, and move to a capabilities array (I/O/...) +// FIXME: keep this in sync with the client side on llms.types.ts const Interface_enum = z.enum([ LLM_IF_OAI_Chat, // OpenAI Chat LLM_IF_OAI_Fn, // JSON mode? @@ -23,6 +24,7 @@ const Interface_enum = z.enum([ LLM_IF_OAI_Complete, // Complete mode LLM_IF_ANT_PromptCaching, // Anthropic Prompt caching LLM_IF_SPECIAL_OAI_O1Preview, // Special OAI O1 Preview + LLM_IF_OAI_PromptCaching, // OpenAI Prompt caching ]); diff --git a/src/modules/llms/server/openai/models/openai.models.ts b/src/modules/llms/server/openai/models/openai.models.ts index a92372bcc..daf34377f 100644 --- a/src/modules/llms/server/openai/models/openai.models.ts +++ b/src/modules/llms/server/openai/models/openai.models.ts @@ -1,6 +1,6 @@ import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; -import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types'; +import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types'; import type { ModelDescriptionSchema } from '../../llm.server.types'; import { fromManualMapping, ManualMappings } from './models.data'; @@ -21,7 +21,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 4096, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], chatPrice: { input: 5, output: 15 }, benchmark: { cbaElo: 1286 }, }, @@ -33,7 +33,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 16384, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], // + Structured Outputs? + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], // + Structured Outputs? chatPrice: { input: 2.5, output: 10 }, benchmark: { cbaElo: 1286 + 1 }, }, @@ -44,7 +44,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 4096, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], chatPrice: { input: 5, output: 15 }, benchmark: { cbaElo: 1286 }, hidden: true, @@ -56,7 +56,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 16384, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], chatPrice: { input: 5, output: 15 }, }, @@ -71,7 +71,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 16384, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], chatPrice: { input: 0.15, output: 0.60 }, benchmark: { cbaElo: 1277, cbaMmlu: 82.0 }, }, @@ -82,7 +82,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 16384, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], chatPrice: { input: 0.15, output: 0.60 }, benchmark: { cbaElo: 1277, cbaMmlu: 82.0 }, }, @@ -98,7 +98,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 32768, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching], chatPrice: { input: 15, output: 60 }, isPreview: true, }, @@ -109,7 +109,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 32768, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching], chatPrice: { input: 15, output: 60 }, isPreview: true, }, @@ -125,7 +125,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 65536, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching], chatPrice: { input: 3, output: 12 }, isPreview: true, }, @@ -136,7 +136,7 @@ export const _knownOpenAIChatModels: ManualMappings = [ contextWindow: 128000, maxCompletionTokens: 65536, trainingDataCutoff: 'Oct 2023', - interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview], + interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching], chatPrice: { input: 3, output: 12 }, isPreview: true, },