OpenAI: mark the prompt caching models

2026-05-10 21:50:14 -07:00 · 2024-10-01 11:25:11 -07:00
parent 0e566edf42
commit cefe208abd
3 changed files with 18 additions and 13 deletions
@@ -55,10 +55,12 @@ export type DModelInterfaceV1 =
  | 'oai-chat-fn'
  | 'oai-complete'
  | 'ant-prompt-caching'
-  | 'oai-o1-preview' // only append below this line
+  | 'oai-o1-preview'
+  | 'oai-prompt-caching' // only append below this line
  ;

 // Model interfaces (chat, and function calls) - here as a preview, will be used more broadly in the future
+// FIXME: keep this in sync with the server side on modules/llms/server/llm.server.types.ts
 export const LLM_IF_OAI_Chat: DModelInterfaceV1 = 'oai-chat';
 export const LLM_IF_OAI_Json: DModelInterfaceV1 = 'oai-chat-json';
 export const LLM_IF_OAI_Vision: DModelInterfaceV1 = 'oai-chat-vision';
@@ -66,6 +68,7 @@ export const LLM_IF_OAI_Fn: DModelInterfaceV1 = 'oai-chat-fn';
 export const LLM_IF_OAI_Complete: DModelInterfaceV1 = 'oai-complete';
 export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching';
 export const LLM_IF_SPECIAL_OAI_O1Preview: DModelInterfaceV1 = 'oai-o1-preview';
+export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';

 // Future changes?
 // export type DModelPartKind = 'text' | 'image' | 'audio' | 'video' | 'pdf';
@@ -1,6 +1,6 @@
 import { z } from 'zod';

-import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';
+import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';


 export type ModelDescriptionSchema = z.infer<typeof ModelDescription_schema>;
@@ -15,6 +15,7 @@ export type ModelDescriptionSchema = z.infer<typeof ModelDescription_schema>;
 /// Interfaces

 // TODO: just remove this, and move to a capabilities array (I/O/...)
+// FIXME: keep this in sync with the client side on llms.types.ts
 const Interface_enum = z.enum([
  LLM_IF_OAI_Chat,              // OpenAI Chat
  LLM_IF_OAI_Fn,                // JSON mode?
@@ -23,6 +24,7 @@ const Interface_enum = z.enum([
  LLM_IF_OAI_Complete,          // Complete mode
  LLM_IF_ANT_PromptCaching,     // Anthropic Prompt caching
  LLM_IF_SPECIAL_OAI_O1Preview, // Special OAI O1 Preview
+  LLM_IF_OAI_PromptCaching,     // OpenAI Prompt caching
 ]);


@@ -1,6 +1,6 @@
 import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';

-import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';
+import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';

 import type { ModelDescriptionSchema } from '../../llm.server.types';
 import { fromManualMapping, ManualMappings } from './models.data';
@@ -21,7 +21,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 4096,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 5, output: 15 },
    benchmark: { cbaElo: 1286 },
  },
@@ -33,7 +33,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 16384,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], // + Structured Outputs?
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], // + Structured Outputs?
    chatPrice: { input: 2.5, output: 10 },
    benchmark: { cbaElo: 1286 + 1 },
  },
@@ -44,7 +44,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 4096,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 5, output: 15 },
    benchmark: { cbaElo: 1286 },
    hidden: true,
@@ -56,7 +56,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 16384,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 5, output: 15 },
  },

@@ -71,7 +71,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 16384,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 0.15, output: 0.60 },
    benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
  },
@@ -82,7 +82,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 16384,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 0.15, output: 0.60 },
    benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
  },
@@ -98,7 +98,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 32768,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 15, output: 60 },
    isPreview: true,
  },
@@ -109,7 +109,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 32768,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 15, output: 60 },
    isPreview: true,
  },
@@ -125,7 +125,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 65536,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 3, output: 12 },
    isPreview: true,
  },
@@ -136,7 +136,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
    contextWindow: 128000,
    maxCompletionTokens: 65536,
    trainingDataCutoff: 'Oct 2023',
-    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
+    interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
    chatPrice: { input: 3, output: 12 },
    isPreview: true,
  },