diff --git a/tools/develop/llm-parameter-sweep/llm-anthropic-parameters-sweep.json b/tools/develop/llm-parameter-sweep/llm-anthropic-parameters-sweep.json index 8140bc4e9..801fa414a 100644 --- a/tools/develop/llm-parameter-sweep/llm-anthropic-parameters-sweep.json +++ b/tools/develop/llm-parameter-sweep/llm-anthropic-parameters-sweep.json @@ -1,13 +1,6 @@ { "_comment": "API-validated parameter values. null=undefined/missing. Values are tested and working. Note: temperature is continuous, not discrete.", "_evaluated": "Evaluated: ant-effort, ant-thinking-budget, temperature. If missing, the parameter is not supported by that model.", - "claude-3-5-haiku-20241022": { - "temperature-range": [0,1] - }, - "claude-3-7-sonnet-20250219": { - "ant-thinking-budget": [1024,8192,16384,32768,65535], - "temperature-range": [0,1] - }, "claude-3-haiku-20240307": { "temperature-range": [0,1] }, diff --git a/tools/develop/llm-parameter-sweep/llm-gemini-parameters-sweep.json b/tools/develop/llm-parameter-sweep/llm-gemini-parameters-sweep.json index 4f0993e38..0304a30e7 100644 --- a/tools/develop/llm-parameter-sweep/llm-gemini-parameters-sweep.json +++ b/tools/develop/llm-parameter-sweep/llm-gemini-parameters-sweep.json @@ -52,6 +52,26 @@ "gemini-thinking-level": ["low","high"], "temperature-range": [0,2] }, + "models/gemini-3.1-flash-image-preview": { + "gemini-thinking-budget": [0,1024,16384,24576,32768,65535], + "gemini-thinking-level": ["minimal","high"], + "temperature-range": [0,2] + }, + "models/gemini-3.1-flash-lite-preview": { + "gemini-thinking-budget": [0,1024,16384,24576,32768,65535], + "gemini-thinking-level": ["minimal","low","medium","high"], + "temperature-range": [0,2] + }, + "models/gemini-3.1-pro-preview": { + "gemini-thinking-budget": [1024,16384,24576,32768,65535], + "gemini-thinking-level": ["low","medium","high"], + "temperature-range": [0,2] + }, + "models/gemini-3.1-pro-preview-customtools": { + "gemini-thinking-budget": [1024,16384,24576,32768,65535], + "gemini-thinking-level": ["low","medium","high"], + "temperature-range": [0,2] + }, "models/gemma-3-12b-it": { "temperature-range": [0,2] }, diff --git a/tools/develop/llm-parameter-sweep/llm-openai-parameters-sweep.json b/tools/develop/llm-parameter-sweep/llm-openai-parameters-sweep.json index 93e99ba6f..050b684d1 100644 --- a/tools/develop/llm-parameter-sweep/llm-openai-parameters-sweep.json +++ b/tools/develop/llm-parameter-sweep/llm-openai-parameters-sweep.json @@ -47,7 +47,7 @@ "gpt-5.1-2025-11-13": { "oai-reasoning-effort": ["none","low","medium","high"], "oai-temperature-think-high": [1], - "oai-temperature-think-none": [0,0.5,1,1.5,2], + "oai-temperature-think-none-range": [0,2], "oai-verbosity": ["low","medium","high"], "temperature-range": [0,2], "tools": ["oai-image-generation","oai-web-search"] @@ -77,7 +77,7 @@ "gpt-5.2-2025-12-11": { "oai-reasoning-effort": ["none","low","medium","high","xhigh"], "oai-temperature-think-high": [1], - "oai-temperature-think-none": [0,0.5,1,1.5,2], + "oai-temperature-think-none-range": [0,2], "oai-verbosity": ["low","medium","high"], "temperature-range": [0,2], "tools": ["oai-image-generation","oai-web-search"] @@ -99,6 +99,18 @@ "temperature": [1], "tools": ["oai-image-generation","oai-web-search"] }, + "gpt-5.3-chat-latest": { + "temperature": [1], + "tools": ["oai-image-generation","oai-web-search"] + }, + "gpt-5.3-codex": { + "oai-reasoning-effort": ["none","low","medium","high","xhigh"], + "oai-temperature-think-high": [1], + "oai-temperature-think-none-range": [0,2], + "oai-verbosity": ["low","medium","high"], + "temperature-range": [0,2], + "tools": ["oai-image-generation","oai-web-search"] + }, "o1-2024-12-17": { "oai-reasoning-effort": ["low","medium","high","xhigh"], "oai-temperature-think-high": [0,0.5,1,1.5,2], diff --git a/tools/develop/llm-parameter-sweep/sweep.ts b/tools/develop/llm-parameter-sweep/sweep.ts index 61f81cb6d..dc78f9e63 100644 --- a/tools/develop/llm-parameter-sweep/sweep.ts +++ b/tools/develop/llm-parameter-sweep/sweep.ts @@ -832,14 +832,14 @@ function vendorResultToDialectResults(vendorResult: VendorSweepResult): DialectR } // Special case: temperature with contiguous range from 0 -> use range [min, max] - if (sweepName === 'temperature') { + if (sweepName === 'temperature' || sweepName === 'oai-temperature-think-high' || sweepName === 'oai-temperature-think-none') { const numericPassing = passingValues.filter((v): v is number => typeof v === 'number').sort((a, b) => a - b); const numericTested = sweepResults.map(r => r.paramValue).filter((v): v is number => typeof v === 'number').sort((a, b) => a - b); // Check if passing values form a contiguous prefix of tested values (no gaps) const isContiguousFromStart = numericPassing.length >= 2 && numericPassing.every((v, i) => v === numericTested[i]); if (isContiguousFromStart) { - modelResults['temperature-range'] = [numericPassing[0], numericPassing[numericPassing.length - 1]]; + modelResults[`${sweepName}-range`] = [numericPassing[0], numericPassing[numericPassing.length - 1]]; continue; } }