From b11cac4328ec57b8122d525cee240fbd8d7e7b60 Mon Sep 17 00:00:00 2001
From: Enrico Ros <enrico.ros@gmail.com>
Date: Thu, 29 Jan 2026 22:13:04 -0800
Subject: [PATCH] LLMs: sync CB

---
 .../llms/server/anthropic/anthropic.models.ts | 25 +++++-----
 .../llms/server/gemini/gemini.models.ts       | 24 ++++-----
 .../server/openai/models/deepseek.models.ts   |  4 +-
 .../server/openai/models/mistral.models.ts    | 12 ++---
 .../server/openai/models/moonshot.models.ts   |  8 +--
 .../server/openai/models/openai.models.ts     | 49 ++++++++++---------
 .../llms/server/openai/models/xai.models.ts   | 16 +++---
 7 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/src/modules/llms/server/anthropic/anthropic.models.ts b/src/modules/llms/server/anthropic/anthropic.models.ts
index d99ba83f1..dc7952432 100644
--- a/src/modules/llms/server/anthropic/anthropic.models.ts
+++ b/src/modules/llms/server/anthropic/anthropic.models.ts
@@ -37,6 +37,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
     description: 'Claude Opus 4.5 with extended thinking mode for complex reasoning and agentic workflows',
     interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
     parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAntSkills' }],
+    benchmark: { cbaElo: 1468 }, // claude-opus-4-5-20251101-thinking-32k
     maxCompletionTokens: 32000,
   },
 
@@ -47,7 +48,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
     maxCompletionTokens: 64000,
     interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
     parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntSkills' }],
-    benchmark: { cbaElo: 1451 + 1 }, // FALLBACK-UNTIL-AVAILABLE: claude-opus-4-1-20250805-thinking-16k + 1
+    benchmark: { cbaElo: 1450 }, // claude-sonnet-4-5-20250929-thinking-32k
   },
 
   'claude-haiku-4-5-20251001': {
@@ -67,7 +68,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
     maxCompletionTokens: 32000,
     interfaces: IF_4_R,
     parameterSpecs: ANT_PAR_WEB_THINKING,
-    benchmark: { cbaElo: 1451 }, // claude-opus-4-1-20250805-thinking-16k
+    benchmark: { cbaElo: 1448 }, // claude-opus-4-1-20250805-thinking-16k
   },
 
   // Claude 4 models with thinking variants
@@ -79,7 +80,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
     maxCompletionTokens: 32000,
     interfaces: IF_4_R,
     parameterSpecs: ANT_PAR_WEB_THINKING,
-    benchmark: { cbaElo: 1420 }, // claude-opus-4-20250514-thinking-16k
+    benchmark: { cbaElo: 1424 }, // claude-opus-4-20250514-thinking-16k
   },
 
   'claude-sonnet-4-20250514': {
@@ -100,7 +101,7 @@ const _hardcodedAnthropicVariants: ModelVariantMap = {
     maxCompletionTokens: 64000,
     interfaces: IF_4_R,
     parameterSpecs: ANT_PAR_WEB_THINKING,
-    benchmark: { cbaElo: 1385 }, // claude-3-7-sonnet-20250219-thinking-32k
+    benchmark: { cbaElo: 1389 }, // claude-3-7-sonnet-20250219-thinking-32k
   },
 
 } as const;
@@ -122,6 +123,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
     parameterSpecs: [...ANT_PAR_WEB, { paramId: 'llmVndAntEffort' }],
     chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
+    benchmark: { cbaElo: 1466 }, // claude-opus-4-5-20251101
   },
   {
     id: 'claude-sonnet-4-5-20250929', // Active
@@ -143,7 +145,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
         duration: 300,
       },
     },
-    benchmark: { cbaElo: 1438 + 1 }, // FALLBACK-UNTIL-AVAILABLE: claude-opus-4-1-20250805 + 1
+    benchmark: { cbaElo: 1450 }, // claude-sonnet-4-5-20250929
   },
   {
     id: 'claude-haiku-4-5-20251001', // Active
@@ -154,6 +156,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     interfaces: IF_4,
     parameterSpecs: [...ANT_PAR_WEB, { paramId: 'llmVndAntSkills' }],
     chatPrice: { input: 1, output: 5, cache: { cType: 'ant-bp', read: 0.10, write: 1.25, duration: 300 } },
+    benchmark: { cbaElo: 1403 }, // claude-haiku-4-5-20251001
   },
 
   // Claude 4.1 models
@@ -166,7 +169,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     interfaces: IF_4,
     parameterSpecs: ANT_PAR_WEB,
     chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
-    benchmark: { cbaElo: 1438 }, // claude-opus-4-1-20250805
+    benchmark: { cbaElo: 1445 }, // claude-opus-4-1-20250805
   },
 
   // Claude 4 models
@@ -180,7 +183,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     interfaces: IF_4,
     parameterSpecs: ANT_PAR_WEB,
     chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
-    benchmark: { cbaElo: 1411 }, // claude-opus-4-20250514
+    benchmark: { cbaElo: 1414 }, // claude-opus-4-20250514
   },
   {
     id: 'claude-sonnet-4-20250514', // Active
@@ -202,7 +205,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
         duration: 300,
       },
     },
-    benchmark: { cbaElo: 1386 }, // claude-sonnet-4-20250514
+    benchmark: { cbaElo: 1390 }, // claude-sonnet-4-20250514
   },
 
   // Claude 3.7 models
@@ -215,7 +218,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     interfaces: IF_4,
     parameterSpecs: ANT_PAR_WEB,
     chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
-    benchmark: { cbaElo: 1369 }, // claude-3-7-sonnet-20250219
+    benchmark: { cbaElo: 1372 }, // claude-3-7-sonnet-20250219
     hidden: true, // deprecated
     isLegacy: true,
   },
@@ -232,7 +235,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     interfaces: IF_4,
     parameterSpecs: ANT_PAR_WEB,
     chatPrice: { input: 0.80, output: 4.00, cache: { cType: 'ant-bp', read: 0.08, write: 1.00, duration: 300 } },
-    benchmark: { cbaElo: 1319, cbaMmlu: 75.2 }, // claude-3-5-haiku-20241022
+    benchmark: { cbaElo: 1324 }, // claude-3-5-haiku-20241022
     hidden: true, // deprecated
     isLegacy: true,
   },
@@ -248,7 +251,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
     maxCompletionTokens: 4096,
     interfaces: IF_4,
     chatPrice: { input: 0.25, output: 1.25, cache: { cType: 'ant-bp', read: 0.03, write: 0.30, duration: 300 } },
-    benchmark: { cbaElo: 1263, cbaMmlu: 75.1 },
+    benchmark: { cbaElo: 1262 }, // claude-3-haiku-20240307
   },
 
   // Legacy/Retired models
diff --git a/src/modules/llms/server/gemini/gemini.models.ts b/src/modules/llms/server/gemini/gemini.models.ts
index 84ff6cc01..88525216a 100644
--- a/src/modules/llms/server/gemini/gemini.models.ts
+++ b/src/modules/llms/server/gemini/gemini.models.ts
@@ -172,7 +172,7 @@ const _knownGeminiModels: ({
       { paramId: 'llmVndGeminiGoogleSearch' },
       // { paramId: 'llmVndGeminiComputerUse' }, // we don't have the logic to handle this yet
     ],
-    benchmark: { cbaElo: 1490 }, // gemini-3-pro
+    benchmark: { cbaElo: 1487 }, // gemini-3-pro
   },
 
   // 3.0 Pro Image Preview - Released November 20, 2025
@@ -221,7 +221,7 @@ const _knownGeminiModels: ({
       { paramId: 'llmVndGeminiGoogleSearch' },
       // { paramId: 'llmVndGeminiComputerUse' }, // we don't have the logic to handle this yet
     ],
-    benchmark: { cbaElo: 1480 }, // gemini-3-flash
+    benchmark: { cbaElo: 1471 }, // gemini-3-flash
   },
 
   /// Generation 2.5
@@ -237,7 +237,7 @@ const _knownGeminiModels: ({
       { paramId: 'llmVndGeminiThinkingBudget', rangeOverride: [128, 32768] /* does not support 0 which would turn thinking off */ },
       { paramId: 'llmVndGeminiGoogleSearch' },
     ],
-    benchmark: { cbaElo: 1451 }, // gemini-2.5-pro
+    benchmark: { cbaElo: 1450 }, // gemini-2.5-pro
   },
 
   // REMOVED MODELS (no longer returned by API as of Jan 8, 2026):
@@ -289,7 +289,7 @@ const _knownGeminiModels: ({
       { paramId: 'llmVndGeminiThinkingBudget' },
       { paramId: 'llmVndGeminiGoogleSearch' },
     ],
-    benchmark: { cbaElo: 1406 + 2 }, // gemini-2.5-flash-preview-09-2025 - the +2 is to be on top of the non-preview 2.5-flash (1407)
+    benchmark: { cbaElo: 1405 }, // gemini-2.5-flash-preview-09-2025
   },
   // 2.5 Flash
   {
@@ -303,7 +303,7 @@ const _knownGeminiModels: ({
       { paramId: 'llmVndGeminiThinkingBudget' },
       { paramId: 'llmVndGeminiGoogleSearch' },
     ],
-    benchmark: { cbaElo: 1407 }, // gemini-2.5-flash (updated from CSV)
+    benchmark: { cbaElo: 1409 }, // gemini-2.5-flash
   },
 
   // REMOVED MODELS (no longer returned by API as of Nov 20, 2025):
@@ -398,7 +398,7 @@ const _knownGeminiModels: ({
       { paramId: 'llmVndGeminiThinkingBudget' },
       { paramId: 'llmVndGeminiGoogleSearch' },
     ],
-    benchmark: { cbaElo: 1380 }, // gemini-2.5-flash-lite-preview-09-2025 (no-thinking variant)
+    benchmark: { cbaElo: 1379 }, // gemini-2.5-flash-lite-preview-09-2025-no-thinking
   },
   // 2.5 Flash-Lite - Released July 2025
   {
@@ -463,7 +463,7 @@ const _knownGeminiModels: ({
     chatPrice: gemini20FlashPricing,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_GEM_CodeExecution],
     parameterSpecs: [{ paramId: 'llmVndGeminiGoogleSearch' }],
-    benchmark: { cbaElo: 1360 }, // gemini-2.0-flash-001
+    benchmark: { cbaElo: 1361 }, // gemini-2.0-flash-001
   },
   {
     id: 'models/gemini-2.0-flash',
@@ -473,7 +473,7 @@ const _knownGeminiModels: ({
     chatPrice: gemini20FlashPricing,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_GEM_CodeExecution],
     parameterSpecs: [{ paramId: 'llmVndGeminiGoogleSearch' }],
-    benchmark: { cbaElo: 1360 }, // gemini-2.0-flash
+    benchmark: { cbaElo: 1361 }, // gemini-2.0-flash
   },
 
   // 2.0 Flash Lite
@@ -529,7 +529,7 @@ const _knownGeminiModels: ({
     isPreview: true,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
     chatPrice: geminiExpFree, // Free tier only according to pricing page
-    benchmark: { cbaElo: 1311 }, // Estimating based on comparable models
+    benchmark: { cbaElo: 1319 }, // gemma-3n-e4b-it
   },
   {
     id: 'models/gemma-3n-e2b-it',
@@ -547,7 +547,7 @@ const _knownGeminiModels: ({
     isPreview: true,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
     chatPrice: geminiExpFree, // Pricing page indicates free tier only
-    benchmark: { cbaElo: 1341 },
+    benchmark: { cbaElo: 1365 }, // gemma-3-27b-it
     // hidden: true, // Keep visible if it's a distinct offering
   },
   {
@@ -556,7 +556,7 @@ const _knownGeminiModels: ({
     isPreview: true,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
     chatPrice: geminiExpFree,
-    benchmark: { cbaElo: 1321 },
+    benchmark: { cbaElo: 1342 }, // gemma-3-12b-it
   },
   {
     hidden: true, // keep larger model
@@ -564,7 +564,7 @@ const _knownGeminiModels: ({
     isPreview: true,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_HOTFIX_StripImages, LLM_IF_HOTFIX_Sys0ToUsr0],
     chatPrice: geminiExpFree,
-    benchmark: { cbaElo: 1275 },
+    benchmark: { cbaElo: 1303 }, // gemma-3-4b-it
   },
   {
     hidden: true, // keep larger model
diff --git a/src/modules/llms/server/openai/models/deepseek.models.ts b/src/modules/llms/server/openai/models/deepseek.models.ts
index 66cf8bb38..b606d4ede 100644
--- a/src/modules/llms/server/openai/models/deepseek.models.ts
+++ b/src/modules/llms/server/openai/models/deepseek.models.ts
@@ -19,7 +19,7 @@ const _knownDeepseekChatModels: ManualMappings = [
     interfaces: [...IF_3, LLM_IF_OAI_Reasoning],
     maxCompletionTokens: 32768, // default, max: 65536
     chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
-    benchmark: { cbaElo: 1418 }, // deepseek-r1-0528
+    benchmark: { cbaElo: 1412 }, //deepseek-v3.2-exp-thinking
   },
   {
     idPrefix: 'deepseek-chat',
@@ -29,7 +29,7 @@ const _knownDeepseekChatModels: ManualMappings = [
     interfaces: IF_3,
     maxCompletionTokens: 8192, // default is 4096, max is 8192
     chatPrice: { input: 0.28, output: 0.42, cache: { cType: 'oai-ac', read: 0.028 } },
-    benchmark: { cbaElo: 1419 }, // deepseek-v3.1-thinking
+    benchmark: { cbaElo: 1420 }, // deepseek-v3.2
   },
 ];
 
diff --git a/src/modules/llms/server/openai/models/mistral.models.ts b/src/modules/llms/server/openai/models/mistral.models.ts
index 370e31937..69173c563 100644
--- a/src/modules/llms/server/openai/models/mistral.models.ts
+++ b/src/modules/llms/server/openai/models/mistral.models.ts
@@ -25,16 +25,16 @@ const _knownMistralModelDetails: Record<string, {
 }> = {
 
   // Premier models - Mistral 3 (Dec 2025)
-  'mistral-large-2512': { chatPrice: { input: 0.5, output: 1.5 } }, // Mistral Large 3 - MoE 41B active / 675B total
+  'mistral-large-2512': { chatPrice: { input: 0.5, output: 1.5 }, benchmark: { cbaElo: 1414 } }, // Mistral Large 3 - MoE 41B active / 675B total
   'mistral-large-2411': { chatPrice: { input: 2, output: 6 }, benchmark: { cbaElo: 1305 }, hidden: true }, // older version
   'mistral-large-latest': { chatPrice: { input: 0.5, output: 1.5 }, hidden: true }, // → 2512
 
-  'mistral-medium-2508': { chatPrice: { input: 0.4, output: 2 } }, // Mistral Medium 3
-  'mistral-medium-2505': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1383 }, hidden: true }, // older version
+  'mistral-medium-2508': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1412 } }, // Mistral Medium 3
+  'mistral-medium-2505': { chatPrice: { input: 0.4, output: 2 }, benchmark: { cbaElo: 1384 }, hidden: true }, // older version
   'mistral-medium-latest': { chatPrice: { input: 0.4, output: 2 }, hidden: true }, // → 2508
   'mistral-medium': { chatPrice: { input: 0.4, output: 2 }, hidden: true }, // symlink
 
-  'magistral-medium-2509': { chatPrice: { input: 2, output: 5 } }, // reasoning
+  'magistral-medium-2509': { chatPrice: { input: 2, output: 5 }, benchmark: { cbaElo: 1305 } }, // reasoning
   'magistral-medium-latest': { chatPrice: { input: 2, output: 5 }, hidden: true }, // symlink
 
   'devstral-2512': { label: 'Devstral 2 (2512)', chatPrice: { input: 0.4, output: 2 } }, // Devstral 2 - 123B coding agents (API returns "Mistral Vibe Cli")
@@ -61,7 +61,7 @@ const _knownMistralModelDetails: Record<string, {
   'ministral-14b-latest': { chatPrice: { input: 0.2, output: 0.2 }, hidden: true }, // symlink
 
   'ministral-8b-2512': { chatPrice: { input: 0.15, output: 0.15 } }, // Ministral 3 8B
-  'ministral-8b-2410': { chatPrice: { input: 0.1, output: 0.1 }, benchmark: { cbaElo: 1240 }, hidden: true }, // older version
+  'ministral-8b-2410': { chatPrice: { input: 0.1, output: 0.1 }, benchmark: { cbaElo: 1237 }, hidden: true }, // older version
   'ministral-8b-latest': { chatPrice: { input: 0.15, output: 0.15 }, hidden: true }, // symlink
 
   'ministral-3b-2512': { chatPrice: { input: 0.1, output: 0.1 } }, // Ministral 3 3B
@@ -69,7 +69,7 @@ const _knownMistralModelDetails: Record<string, {
   'ministral-3b-latest': { chatPrice: { input: 0.1, output: 0.1 }, hidden: true }, // symlink
 
   // Open models
-  'mistral-small-2506': { chatPrice: { input: 0.1, output: 0.3 } }, // Mistral Small 3.2
+  'mistral-small-2506': { chatPrice: { input: 0.1, output: 0.3 }, benchmark: { cbaElo: 1356 } }, // Mistral Small 3.2
   'mistral-small-latest': { chatPrice: { input: 0.1, output: 0.3 }, hidden: true }, // symlink
 
   'labs-mistral-small-creative': { label: 'Mistral Small Creative', chatPrice: { input: 0.1, output: 0.3 } }, // creative writing, roleplay (Labs)
diff --git a/src/modules/llms/server/openai/models/moonshot.models.ts b/src/modules/llms/server/openai/models/moonshot.models.ts
index 6543a42e4..4160c1c10 100644
--- a/src/modules/llms/server/openai/models/moonshot.models.ts
+++ b/src/modules/llms/server/openai/models/moonshot.models.ts
@@ -38,7 +38,7 @@ const _knownMoonshotModels: ManualMappings = [
     interfaces: IF_K2_5,
     chatPrice: { input: 0.60, output: 3.00, cache: { cType: 'oai-ac', read: 0.10 } },
     parameterSpecs: [{ paramId: 'llmVndMoonReasoningEffort' }],
-    benchmark: { cbaElo: 1417 + 2 }, // to be at the top
+    benchmark: { cbaElo: 1450 }, // kimi-k2.5-thinking
   },
 
   // Kimi K2 Series - Latest Models
@@ -52,7 +52,7 @@ const _knownMoonshotModels: ManualMappings = [
     maxCompletionTokens: 65536,
     interfaces: IF_K2_REASON,
     chatPrice: { input: 1.15, output: 8.00, cache: { cType: 'oai-ac', read: 0.15 } },
-    benchmark: { cbaElo: 1417 + 1 }, // UNKNOWN +1 over 0905, but don't want to be above the non-turbo
+    benchmark: { cbaElo: 1429 }, // kimi-k2-thinking-turbo
     // parameterSpecs: [{ paramId: 'llmVndMoonshotWebSearch' }], // NOT WORKING YET
   },
   // Thinking
@@ -78,7 +78,7 @@ const _knownMoonshotModels: ManualMappings = [
     interfaces: IF_K2,
     chatPrice: { input: 0.60, output: 2.50, cache: { cType: 'oai-ac', read: 0.15 } },
     isPreview: true,
-    benchmark: { cbaElo: 1417 },
+    benchmark: { cbaElo: 1418 }, // kimi-k2-0905-preview
     // parameterSpecs: [{ paramId: 'llmVndMoonshotWebSearch' }],
   },
   {
@@ -91,7 +91,7 @@ const _knownMoonshotModels: ManualMappings = [
     interfaces: IF_K2,
     chatPrice: { input: 0.60, output: 2.50, cache: { cType: 'oai-ac', read: 0.15 } },
     isPreview: true,
-    benchmark: { cbaElo: 1415 },
+    benchmark: { cbaElo: 1417 }, // kimi-k2-0711-preview
     // parameterSpecs: [{ paramId: 'llmVndMoonshotWebSearch' }],
   },
   {
diff --git a/src/modules/llms/server/openai/models/openai.models.ts b/src/modules/llms/server/openai/models/openai.models.ts
index 01d04aec4..394f68946 100644
--- a/src/modules/llms/server/openai/models/openai.models.ts
+++ b/src/modules/llms/server/openai/models/openai.models.ts
@@ -90,7 +90,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
       { paramId: 'llmForceNoStream' },
     ],
     chatPrice: { input: 1.75, cache: { cType: 'oai-ac', read: 0.175 }, output: 14 },
-    // benchmark: TBD
+    benchmark: { cbaElo: 1440 }, // gpt-5.2-high
   },
   {
     idPrefix: 'gpt-5.2',
@@ -174,7 +174,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
       { paramId: 'llmForceNoStream' },
     ],
     chatPrice: { input: 1.25, cache: { cType: 'oai-ac', read: 0.125 }, output: 10 },
-    // benchmark: TBD
+    benchmark: { cbaElo: 1459 }, // gpt-5.1-high
   },
   {
     idPrefix: 'gpt-5.1',
@@ -267,7 +267,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
       { paramId: 'llmForceNoStream' }, // non-streaming option for unverified organizations
     ],
     chatPrice: { input: 1.25, cache: { cType: 'oai-ac', read: 0.125 }, output: 10 },
-    benchmark: { cbaElo: 1442 }, // gpt-5-high
+    benchmark: { cbaElo: 1435 }, // gpt-5-high
   },
   {
     idPrefix: 'gpt-5',
@@ -302,7 +302,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 16384,
     interfaces: [LLM_IF_OAI_Responses, LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_PromptCaching], // no function calling or reasoning
     chatPrice: { input: 1.25, cache: { cType: 'oai-ac', read: 0.125 }, output: 10 },
-    benchmark: { cbaElo: 1430 }, // gpt-5-chat
+    benchmark: { cbaElo: 1426 }, // gpt-5-chat
   },
 
   // GPT-5 Codex
@@ -351,7 +351,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_Tools_WebSearch, LLM_IF_HOTFIX_NoTemperature],
     parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort4' }, { paramId: 'llmVndOaiWebSearchContext' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }, { paramId: 'llmForceNoStream' }],
     chatPrice: { input: 0.25, cache: { cType: 'oai-ac', read: 0.025 }, output: 2 },
-    benchmark: { cbaElo: 1388 }, // gpt-5-mini-high
+    benchmark: { cbaElo: 1390 }, // gpt-5-mini-high
   },
   {
     idPrefix: 'gpt-5-mini',
@@ -369,7 +369,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     interfaces: [LLM_IF_OAI_Responses, ...IFS_CHAT_CACHE_REASON, LLM_IF_HOTFIX_NoTemperature],
     parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort4' }, { paramId: 'llmVndOaiVerbosity' }, { paramId: 'llmVndOaiImageGeneration' }],
     chatPrice: { input: 0.05, cache: { cType: 'oai-ac', read: 0.005 }, output: 0.4 },
-    benchmark: { cbaElo: 1344 }, // gpt-5-nano-high
+    benchmark: { cbaElo: 1338 }, // gpt-5-nano-high
   },
   {
     idPrefix: 'gpt-5-nano',
@@ -390,6 +390,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
       { paramId: 'llmForceNoStream' },
     ],
     // chatPrice: TBD - unknown pricing
+    benchmark: { cbaElo: 1354 }, // gpt-oss-120b
   },
 
 
@@ -455,7 +456,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     interfaces: IFS_CHAT_CACHE_REASON,
     parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
     chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.275 }, output: 4.4 },
-    benchmark: { cbaElo: 1393 }, // o4-mini-2025-04-16
+    benchmark: { cbaElo: 1391 }, // o4-mini-2025-04-16
   },
   {
     idPrefix: 'o4-mini',
@@ -508,7 +509,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     interfaces: IFS_CHAT_CACHE_REASON,
     parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }, { paramId: 'llmForceNoStream' }],
     chatPrice: { input: 2, cache: { cType: 'oai-ac', read: 0.5 }, output: 8 },
-    benchmark: { cbaElo: 1444 }, // o3-2025-04-16
+    benchmark: { cbaElo: 1433 }, // o3-2025-04-16
   },
   {
     idPrefix: 'o3',
@@ -526,7 +527,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_PromptCaching, LLM_IF_OAI_Reasoning, LLM_IF_HOTFIX_StripImages],
     parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }],
     chatPrice: { input: 1.1, cache: { cType: 'oai-ac', read: 0.55 }, output: 4.4 },
-    benchmark: { cbaElo: 1347 }, // o3-mini (not using o3-mini-high here, as it seems too inflated)
+    benchmark: { cbaElo: 1348 }, // o3-mini
   },
   {
     idPrefix: 'o3-mini',
@@ -563,7 +564,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     interfaces: IFS_CHAT_CACHE_REASON,
     parameterSpecs: [{ paramId: 'llmVndOaiReasoningEffort' }, { paramId: 'llmVndOaiRestoreMarkdown' }],
     chatPrice: { input: 15, cache: { cType: 'oai-ac', read: 7.5 }, output: 60 },
-    benchmark: { cbaElo: 1399 }, // o1-2024-12-17
+    benchmark: { cbaElo: 1402 }, // o1-2024-12-17
   },
   {
     idPrefix: 'o1',
@@ -583,7 +584,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 32768,
     interfaces: IFS_CHAT_CACHE,
     chatPrice: { input: 2, cache: { cType: 'oai-ac', read: 0.5 }, output: 8 },
-    benchmark: { cbaElo: 1409 }, // gpt-4.1-2025-04-14
+    benchmark: { cbaElo: 1413 }, // gpt-4.1-2025-04-14
   },
   {
     idPrefix: 'gpt-4.1',
@@ -600,7 +601,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 32768,
     interfaces: IFS_CHAT_CACHE,
     chatPrice: { input: 0.4, cache: { cType: 'oai-ac', read: 0.1 }, output: 1.6 },
-    benchmark: { cbaElo: 1377 }, // gpt-4.1-mini-2025-04-14
+    benchmark: { cbaElo: 1382 }, // gpt-4.1-mini-2025-04-14
   },
   {
     idPrefix: 'gpt-4.1-mini',
@@ -617,7 +618,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 32768,
     interfaces: IFS_CHAT_CACHE,
     chatPrice: { input: 0.1, cache: { cType: 'oai-ac', read: 0.025 }, output: 0.4 },
-    benchmark: { cbaElo: 1320 }, // gpt-4.1-nano-2025-04-14
+    benchmark: { cbaElo: 1322 }, // gpt-4.1-nano-2025-04-14
   },
   {
     idPrefix: 'gpt-4.1-nano',
@@ -694,7 +695,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 16384,
     interfaces: IFS_CHAT_CACHE,
     chatPrice: { input: 2.5, cache: { cType: 'oai-ac', read: 1.25 }, output: 10 },
-    benchmark: { cbaElo: 1333 }, // GPT-4o (08/06)
+    benchmark: { cbaElo: 1335 }, // gpt-4o-2024-08-06
   },
   {
     idPrefix: 'gpt-4o-2024-05-13',
@@ -705,7 +706,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 4096,
     interfaces: IFS_CHAT_MIN,
     chatPrice: { input: 5, output: 15 },
-    benchmark: { cbaElo: 1344 }, // gpt-4o-2024-05-13
+    benchmark: { cbaElo: 1346 }, // gpt-4o-2024-05-13
   },
   {
     idPrefix: 'gpt-4o',
@@ -720,7 +721,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 16384,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Json], // does not support Tools
     chatPrice: { input: 5, output: 15 },
-    benchmark: { cbaElo: 1441 }, // chatgpt-4o-latest-20250326
+    benchmark: { cbaElo: 1443 }, // chatgpt-4o-latest-20250326
     isLegacy: true, // Deprecated February 17, 2026.
   },
 
@@ -784,7 +785,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 16384,
     interfaces: IFS_CHAT_CACHE,
     chatPrice: { input: 0.15, cache: { cType: 'oai-ac', read: 0.075 }, output: 0.6 },
-    benchmark: { cbaElo: 1316 }, // GPT-4o-mini (07/18)
+    benchmark: { cbaElo: 1318 }, // gpt-4o-mini-2024-07-18
   },
   {
     idPrefix: 'gpt-4o-mini',
@@ -838,7 +839,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 4096,
     interfaces: IFS_CHAT_MIN,
     chatPrice: { input: 10, output: 30 },
-    benchmark: { cbaElo: 1324 }, // gpt-4-turbo-2024-04-09
+    benchmark: { cbaElo: 1325 }, // gpt-4-turbo-2024-04-09
   },
   {
     idPrefix: 'gpt-4-turbo',
@@ -854,7 +855,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 4096,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
     chatPrice: { input: 10, output: 30 },
-    benchmark: { cbaElo: 1315 }, // gpt-4-0125-preview
+    benchmark: { cbaElo: 1314 }, // gpt-4-0125-preview
   },
   {
     idPrefix: 'gpt-4-1106-preview', // GPT-4 Turbo preview model
@@ -865,7 +866,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 4096,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json],
     chatPrice: { input: 10, output: 30 },
-    benchmark: { cbaElo: 1315 }, // gpt-4-1106-preview
+    benchmark: { cbaElo: 1314 }, // gpt-4-1106-preview
   },
   {
     idPrefix: 'gpt-4-turbo-preview',
@@ -883,7 +884,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     contextWindow: 8192,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
     chatPrice: { input: 30, output: 60 },
-    benchmark: { cbaElo: 1163 },
+    benchmark: { cbaElo: 1276 }, // gpt-4-0613
     isLegacy: true,
   },
   {
@@ -894,7 +895,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     contextWindow: 8192,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
     chatPrice: { input: 30, output: 60 },
-    benchmark: { cbaElo: 1186 },
+    benchmark: { cbaElo: 1288 }, // gpt-4-0314
     isLegacy: true,
   },
   {
@@ -917,7 +918,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 4096,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
     chatPrice: { input: 0.5, output: 1.5 },
-    benchmark: { cbaElo: 1106 },
+    benchmark: { cbaElo: 1225 }, // gpt-3.5-turbo-0125
   },
   {
     idPrefix: 'gpt-3.5-turbo-1106',
@@ -928,7 +929,7 @@ export const _knownOpenAIChatModels: ManualMappings = [
     maxCompletionTokens: 4096,
     interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Fn],
     chatPrice: { input: 1, output: 2 },
-    benchmark: { cbaElo: 1068 },
+    benchmark: { cbaElo: 1204 }, // gpt-3.5-turbo-1106
   },
   {
     idPrefix: 'gpt-3.5-turbo',
diff --git a/src/modules/llms/server/openai/models/xai.models.ts b/src/modules/llms/server/openai/models/xai.models.ts
index 1f9459e75..988390bb6 100644
--- a/src/modules/llms/server/openai/models/xai.models.ts
+++ b/src/modules/llms/server/openai/models/xai.models.ts
@@ -83,7 +83,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: [...XAI_IF_Vision, LLM_IF_OAI_Reasoning],
     parameterSpecs: XAI_PAR_Reasoning,
     chatPrice: PRICE_41,
-    benchmark: { cbaElo: 1483 }, // grok-4-1-fast-reasoning
+    benchmark: { cbaElo: 1430 }, // grok-4-1-fast-reasoning
   },
   {
     idPrefix: 'grok-4-1-fast-non-reasoning',
@@ -94,7 +94,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: XAI_IF_Vision,
     parameterSpecs: XAI_PAR,
     chatPrice: PRICE_41,
-    benchmark: { cbaElo: 1465 }, // grok-4-1-fast-non-reasoning
+    benchmark: { cbaElo: 1466 }, // grok-4.1
   },
 
   // Grok 4
@@ -108,7 +108,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: [...XAI_IF_Vision, LLM_IF_OAI_Reasoning],
     parameterSpecs: XAI_PAR_Reasoning,
     chatPrice: PRICE_40,
-    benchmark: { cbaElo: 1420 },
+    benchmark: { cbaElo: 1404 }, // grok-4-fast-reasoning
   },
   {
     hidden: true, // yield to 4.1
@@ -120,7 +120,6 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: XAI_IF_Vision,
     parameterSpecs: XAI_PAR,
     chatPrice: PRICE_40,
-    benchmark: { cbaElo: 1409 },
   },
   {
     idPrefix: 'grok-4-0709',
@@ -131,7 +130,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: [...XAI_IF_Vision, LLM_IF_OAI_Reasoning],
     parameterSpecs: XAI_PAR_Reasoning,
     chatPrice: { input: 3, output: 15, cache: { cType: 'oai-ac', read: 0.75 } },
-    benchmark: { cbaElo: 1415 + 6 }, // grok-4-0709 (+6 to stay on top of the fast)
+    benchmark: { cbaElo: 1410 }, // grok-4-0709
   },
 
   // Grok 3 (Pre-Grok 4: no server-side tools)
@@ -144,7 +143,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: XAI_IF_Pre4,
     parameterSpecs: XAI_PAR_Pre4,
     chatPrice: { input: 3, output: 15, cache: { cType: 'oai-ac', read: 0.75 } },
-    benchmark: { cbaElo: 1409 }, // grok-3-preview-02-24
+    benchmark: { cbaElo: 1411 }, // grok-3-preview-02-24
   },
   {
     idPrefix: 'grok-3-mini',
@@ -155,7 +154,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: [...XAI_IF_Pre4, LLM_IF_OAI_Reasoning],
     parameterSpecs: XAI_PAR_Pre4,
     chatPrice: { input: 0.3, output: 0.5, cache: { cType: 'oai-ac', read: 0.075 } },
-    benchmark: { cbaElo: 1358 }, // grok-3-mini-beta (updated from CSV)
+    benchmark: { cbaElo: 1357 }, // grok-3-mini-beta
   },
 
   // Grok Code (Pre-Grok 4: no server-side tools)
@@ -181,8 +180,7 @@ const _knownXAIChatModels: ManualMappings = [
     interfaces: XAI_IF_Pre4_Vision,
     parameterSpecs: XAI_PAR_Pre4,
     chatPrice: { input: 2, output: 10 },
-    // Fuzzy matched with "grok-2-2024-08-13" (1288) => wrong, but still we need a fallback
-    benchmark: { cbaElo: 1288 },
+    // no benchmark: keep this out
   },
 
 ] as const;