OpenAI: mark the prompt caching models

This commit is contained in:
Enrico Ros
2024-10-01 11:25:11 -07:00
parent 0e566edf42
commit cefe208abd
3 changed files with 18 additions and 13 deletions
+4 -1
View File
@@ -55,10 +55,12 @@ export type DModelInterfaceV1 =
| 'oai-chat-fn'
| 'oai-complete'
| 'ant-prompt-caching'
| 'oai-o1-preview' // only append below this line
| 'oai-o1-preview'
| 'oai-prompt-caching' // only append below this line
;
// Model interfaces (chat, and function calls) - here as a preview, will be used more broadly in the future
// FIXME: keep this in sync with the server side on modules/llms/server/llm.server.types.ts
export const LLM_IF_OAI_Chat: DModelInterfaceV1 = 'oai-chat';
export const LLM_IF_OAI_Json: DModelInterfaceV1 = 'oai-chat-json';
export const LLM_IF_OAI_Vision: DModelInterfaceV1 = 'oai-chat-vision';
@@ -66,6 +68,7 @@ export const LLM_IF_OAI_Fn: DModelInterfaceV1 = 'oai-chat-fn';
export const LLM_IF_OAI_Complete: DModelInterfaceV1 = 'oai-complete';
export const LLM_IF_ANT_PromptCaching: DModelInterfaceV1 = 'ant-prompt-caching';
export const LLM_IF_SPECIAL_OAI_O1Preview: DModelInterfaceV1 = 'oai-o1-preview';
export const LLM_IF_OAI_PromptCaching: DModelInterfaceV1 = 'oai-prompt-caching';
// Future changes?
// export type DModelPartKind = 'text' | 'image' | 'audio' | 'video' | 'pdf';
+3 -1
View File
@@ -1,6 +1,6 @@
import { z } from 'zod';
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';
export type ModelDescriptionSchema = z.infer<typeof ModelDescription_schema>;
@@ -15,6 +15,7 @@ export type ModelDescriptionSchema = z.infer<typeof ModelDescription_schema>;
/// Interfaces
// TODO: just remove this, and move to a capabilities array (I/O/...)
// FIXME: keep this in sync with the client side on llms.types.ts
const Interface_enum = z.enum([
LLM_IF_OAI_Chat, // OpenAI Chat
LLM_IF_OAI_Fn, // JSON mode?
@@ -23,6 +24,7 @@ const Interface_enum = z.enum([
LLM_IF_OAI_Complete, // Complete mode
LLM_IF_ANT_PromptCaching, // Anthropic Prompt caching
LLM_IF_SPECIAL_OAI_O1Preview, // Special OAI O1 Preview
LLM_IF_OAI_PromptCaching, // OpenAI Prompt caching
]);
@@ -1,6 +1,6 @@
import type { OpenAIWire_API_Models_List } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';
import { LLM_IF_OAI_Chat, LLM_IF_OAI_Complete, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Vision, LLM_IF_SPECIAL_OAI_O1Preview } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import { fromManualMapping, ManualMappings } from './models.data';
@@ -21,7 +21,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 5, output: 15 },
benchmark: { cbaElo: 1286 },
},
@@ -33,7 +33,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json], // + Structured Outputs?
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching], // + Structured Outputs?
chatPrice: { input: 2.5, output: 10 },
benchmark: { cbaElo: 1286 + 1 },
},
@@ -44,7 +44,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 5, output: 15 },
benchmark: { cbaElo: 1286 },
hidden: true,
@@ -56,7 +56,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 5, output: 15 },
},
@@ -71,7 +71,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 0.15, output: 0.60 },
benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
},
@@ -82,7 +82,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 16384,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 0.15, output: 0.60 },
benchmark: { cbaElo: 1277, cbaMmlu: 82.0 },
},
@@ -98,7 +98,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 32768,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 15, output: 60 },
isPreview: true,
},
@@ -109,7 +109,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 32768,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 15, output: 60 },
isPreview: true,
},
@@ -125,7 +125,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 65536,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 3, output: 12 },
isPreview: true,
},
@@ -136,7 +136,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
contextWindow: 128000,
maxCompletionTokens: 65536,
trainingDataCutoff: 'Oct 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview],
interfaces: [LLM_IF_OAI_Chat, LLM_IF_SPECIAL_OAI_O1Preview, LLM_IF_OAI_PromptCaching],
chatPrice: { input: 3, output: 12 },
isPreview: true,
},