Sweep: more succinct output

This commit is contained in:
Enrico Ros
2026-02-04 19:12:50 -08:00
parent d37a603db2
commit 51d58223b4
5 changed files with 253 additions and 247 deletions
@@ -1,6 +1,7 @@
{
"_comment": "API-validated parameter values. null=undefined/missing. Values are tested and working. Note: temperature is continuous, not discrete.",
"_evaluated": "Evaluated: gemini-thinking-budget, gemini-thinking-level, temperature. If missing, the parameter is not supported by that model.",
"_modelFilter": "models/gemini-3, models/gemini-2, models/gemma",
"models/gemini-2.0-flash-001": {
"gemini-thinking-budget": [0],
"temperature-range": [0,2]
@@ -32,7 +33,7 @@
"temperature-range": [0,2]
},
"models/gemini-2.5-flash-preview-tts": {
"temperature": [0,0.5,1.5,2]
"temperature-range": [0,2]
},
"models/gemini-2.5-pro": {
"gemini-thinking-budget": [1024,16384,24576,32768],
@@ -1,107 +1,98 @@
{
"_comment": "API-validated parameter values. null=undefined/missing. Values are tested and working. Note: temperature is continuous, not discrete.",
"_evaluated": "Evaluated: oai-image-generation, oai-reasoning-effort, oai-verbosity, oai-web-search, temperature. If missing, the parameter is not supported by that model.",
"_modelFilter": "gpt-5, o",
"gpt-5-2025-08-07": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["minimal","low","medium","high"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5-chat-latest": {
"oai-image-generation": ["hq"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5-codex": {
"oai-reasoning-effort": ["low","medium","high"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-web-search"]
},
"gpt-5-mini-2025-08-07": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["minimal","low","medium","high"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5-nano-2025-08-07": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["minimal","low","medium","high"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5-pro-2025-10-06": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["high"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5-search-api-2025-10-14": {
"temperature-range": [0,2]
},
"gpt-5.1-2025-11-13": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["none","low","medium","high"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5.1-chat-latest": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["medium"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5.1-codex": {
"oai-reasoning-effort": ["low","medium","high"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-web-search"]
},
"gpt-5.1-codex-max": {
"oai-reasoning-effort": ["low","medium","high","xhigh"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-web-search"]
},
"gpt-5.1-codex-mini": {
"oai-reasoning-effort": ["low","medium","high"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-web-search"]
},
"gpt-5.2-2025-12-11": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["none","low","medium","high","xhigh"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5.2-chat-latest": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["medium"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-image-generation","oai-web-search"]
},
"gpt-5.2-codex": {
"oai-reasoning-effort": ["low", "medium","high","xhigh"],
"oai-reasoning-effort": ["low","medium","high","xhigh"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-web-search"]
},
"gpt-5.2-pro-2025-12-11": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["medium","high","xhigh"],
"oai-verbosity": ["low","medium","high"],
"oai-web-search": ["medium"],
"temperature": [1]
"temperature": [1],
"tools": ["oai-image-generation","oai-web-search"]
},
"o1-2024-12-17": {
"oai-reasoning-effort": ["low","medium","high","xhigh"],
@@ -114,41 +105,39 @@
"temperature-range": [0,2]
},
"o3-2025-04-16": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["low","medium","high","xhigh"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"o3-deep-research-2025-06-26": {
"oai-reasoning-effort": ["medium"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-web-search"]
},
"o3-mini-2025-01-31": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["low","medium","high","xhigh"],
"oai-verbosity": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation"]
},
"o3-pro-2025-06-10": {
"oai-image-generation": ["hq"],
"oai-reasoning-effort": ["low","medium","high"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-image-generation","oai-web-search"]
},
"o4-mini-2025-04-16": {
"oai-reasoning-effort": ["low","medium","high","xhigh"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-web-search"]
},
"o4-mini-deep-research-2025-06-26": {
"oai-reasoning-effort": ["medium"],
"oai-verbosity": ["medium"],
"oai-web-search": ["medium"],
"temperature-range": [0,2]
"temperature-range": [0,2],
"tools": ["oai-web-search"]
}
}
@@ -2,7 +2,7 @@
"_comment": "API-validated parameter values. null=undefined/missing. Values are tested and working. Note: temperature is continuous, not discrete.",
"_evaluated": "Evaluated: temperature, xai-reasoning-effort, xai-web-search. If missing, the parameter is not supported by that model.",
"grok-2-vision-1212": {
"temperature-range": [0,1.5],
"temperature-range": [0,2],
"xai-reasoning-effort": ["low","medium","high"]
},
"grok-3": {
@@ -16,27 +16,27 @@
"grok-4-0709": {
"temperature-range": [0,2],
"xai-reasoning-effort": ["low","medium","high"],
"xai-web-search": ["auto"]
"xai-tools": ["xai-web-search"]
},
"grok-4-1-fast-non-reasoning": {
"temperature-range": [0,2],
"xai-reasoning-effort": ["low","medium","high"],
"xai-web-search": ["auto"]
"xai-tools": ["xai-web-search"]
},
"grok-4-1-fast-reasoning": {
"temperature-range": [0,2],
"temperature-range": [0,1.5],
"xai-reasoning-effort": ["low","medium","high"],
"xai-web-search": ["auto"]
"xai-tools": ["xai-web-search"]
},
"grok-4-fast-non-reasoning": {
"temperature-range": [0,2],
"xai-reasoning-effort": ["low","medium","high"],
"xai-web-search": ["auto"]
"xai-tools": ["xai-web-search"]
},
"grok-4-fast-reasoning": {
"temperature-range": [0,2],
"temperature-range": [0,1.5],
"xai-reasoning-effort": ["low","medium","high"],
"xai-web-search": ["auto"]
"xai-tools": ["xai-web-search"]
},
"grok-code-fast-1": {
"temperature-range": [0,1],
@@ -21,7 +21,6 @@
},
"xai": {
"access": { "dialect": "xai", "oaiKey": "xai-...", "oaiOrg": "", "oaiHost": "", "heliKey": "" },
"modelFilter": "grok",
"sweeps": ["temperature", "xai-reasoning-effort"],
"baseModelOverrides": { "maxTokens": 512 }
},
+199 -182
View File
@@ -23,172 +23,7 @@ import { createChatGenerateDispatch } from '~/modules/aix/server/dispatch/chatGe
import { fetchResponseOrTRPCThrow, TRPCFetcherError } from '~/server/trpc/trpc.router.fetchers';
// ============================================================================
// Terminal Colors
// ============================================================================
const COLORS = {
reset: '\x1b[0m',
bright: '\x1b[1m',
dim: '\x1b[2m',
red: '\x1b[31m',
green: '\x1b[32m',
yellow: '\x1b[33m',
cyan: '\x1b[36m',
magenta: '\x1b[35m',
} as const;
// ============================================================================
// Types
// ============================================================================
interface CliOptions {
config?: string;
dialect?: string;
key?: string;
host?: string;
modelFilter?: string;
sweepFilter?: string;
delay: number;
maxModels: number;
verbose: boolean;
debug: boolean;
includeSymlinks: boolean;
dryRun: boolean;
sequential: boolean;
}
type SweepValue = string | number | boolean | null;
interface SweepDefinition<TValue> {
name: string;
description: string;
applicability:
| { type: 'all' }
| { type: 'dialects'; dialects: AixAPI_Access['dialect'][] };
values: TValue[];
applyToModel: (value: TValue) => Partial<AixAPI_Model>;
mode: 'enumerate' | 'bisect';
/** For bisect mode: precision to stop binary search */
bisectPrecision?: number;
}
function defineSweep<const TValue>(definition: SweepDefinition<TValue>) {
return definition;
}
interface VendorSweepConfig {
access: AixAPI_Access;
sweeps?: string[]; // names of built-in sweeps from SWEEP_DEFINITIONS
modelFilter?: string | string[]; // prefix(es) to match model IDs
baseModelOverrides?: Partial<AixAPI_Model>;
}
interface SweepConfig {
delayMs?: number;
maxTokens?: number;
vendors: Record<string, VendorSweepConfig>;
}
// Results file format: dialect -> model -> sweep -> passing values
type SweepResultsFile = Record<string, DialectResults>;
type DialectResults = Record<string, ModelResults>;
type ModelResults = Record<string, SweepValue[]>;
type ErrorCategory =
| 'exception' // exception testing the parameter
| 'dialect' // parsing fails
| 'abort' | 'connection' | 'http' | 'parse'; // tRPC errors
type TestOutcome = 'pass' | 'fail' | 'truncated' | 'error';
interface TestResult {
sweepName: string;
paramValue: SweepValue;
outcome: TestOutcome;
errorMessage: string | null; // source of truth for non-pass outcomes (always set when outcome !== 'pass')
errorCategory?: ErrorCategory; // secondary, used for symbol display
httpStatus?: number;
responseText?: string;
verboseLogs: string[]; // --verbose: response/error details
debugRequestAixModel?: string; // --debug: AixAPI_Model JSON
debugRequestBody?: string; // --debug: request body JSON
durationMs: number;
}
interface ModelSweepResult {
modelId: string;
modelLabel: string;
results: TestResult[];
}
interface VendorSweepResult {
vendorName: string;
dialect: AixAPI_Access['dialect'];
modelsAvailable: number;
modelsTested: number;
models: ModelSweepResult[];
}
// ============================================================================
// SweepCollectorTransmitter - Lightweight IParticleTransmitter for probing
// ============================================================================
class SweepCollectorTransmitter implements IParticleTransmitter {
text: string = '';
dialectIssue: string | null = null;
tokenStopReason: AixWire_Particles.GCTokenStopReason | null = null;
endReason: string | null = null;
get hasText(): boolean { return this.text.length > 0; }
get hasError(): boolean { return this.dialectIssue !== null; }
// Parser-initiated Control
setEnded(reason: 'done-dialect' | 'issue-dialect'): void {
this.endReason = reason;
}
setDialectTerminatingIssue(dialectText: string, _symbol: string | null, _serverLog: ParticleServerLogLevel): void {
this.dialectIssue = dialectText;
}
// Parts data - only collect text, everything else is a no-op
endMessagePart(): void { /* no-op */ }
appendText(textChunk: string): void {
this.text += textChunk;
}
appendReasoningText(_textChunk: string, _options?: { weak?: 'tag'; restart?: boolean }): void { /* no-op */ }
setReasoningSignature(_signature: string): void { /* no-op */ }
addReasoningRedactedData(_data: string): void { /* no-op */ }
appendAutoText_weak(textChunk: string): void { this.text += textChunk; }
appendAudioInline(_mimeType: string, _base64Data: string, _label: string, _generator: string, _durationMs: number): void { /* no-op */ }
appendImageInline(_mimeType: string, _base64Data: string, _label: string, _generator: string, _prompt: string): void { /* no-op */ }
startFunctionCallInvocation(_id: string | null, _functionName: string, _expectedArgsFmt: 'incr_str' | 'json_object', _args: string | object | null): void { /* no-op */ }
appendFunctionCallInvocationArgs(_id: string | null, _argsJsonChunk: string): void { /* no-op */ }
addCodeExecutionInvocation(_id: string | null, _language: string, _code: string, _author: 'gemini_auto_inline' | 'code_interpreter'): void { /* no-op */ }
addCodeExecutionResponse(_id: string | null, _error: boolean | string, _result: string, _executor: 'gemini_auto_inline' | 'code_interpreter', _environment: 'upstream'): void { /* no-op */ }
appendUrlCitation(_title: string, _url: string, _citationNumber?: number, _startIndex?: number, _endIndex?: number, _textSnippet?: string, _pubTs?: number): void { /* no-op */ }
// Special
sendControl(_cgCOp: AixWire_Particles.ChatControlOp, _flushQueue?: boolean): void { /* no-op */ }
sendVoidPlaceholder(_mot: 'search-web' | 'gen-image' | 'code-exec', _text: string): void { /* no-op */ }
sendSetVendorState(_vendor: string, _state: unknown): void { /* no-op */ }
// Non-parts data
setModelName(_modelName: string): void { /* no-op */ }
setUpstreamHandle(_handle: string, _type: 'oai-responses'): void { /* no-op */ }
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void { this.tokenStopReason = reason; }
updateMetrics(_update: Partial<AixWire_Particles.CGSelectMetrics>): void { /* no-op */ }
}
// ============================================================================
// Built-in Sweep Definitions
// ============================================================================
// --- SWEEP DEFINITIONS ---
const SWEEP_DEFINITIONS = [
@@ -324,6 +159,165 @@ const SWEEP_DEFINITIONS = [
] as const satisfies SweepDefinition<any>[];
interface SweepDefinition<TValue> {
name: string;
description: string;
applicability:
| { type: 'all' }
| { type: 'dialects'; dialects: AixAPI_Access['dialect'][] };
values: TValue[];
applyToModel: (value: TValue) => Partial<AixAPI_Model>;
mode: 'enumerate' | 'bisect';
/** For bisect mode: precision to stop binary search */
bisectPrecision?: number;
}
type SweepValue = string | number | boolean | null;
function defineSweep<const TValue>(definition: SweepDefinition<TValue>) {
return definition;
}
// ============================================================================
// Types
// ============================================================================
const COLORS = {
reset: '\x1b[0m',
bright: '\x1b[1m',
dim: '\x1b[2m',
red: '\x1b[31m',
green: '\x1b[32m',
yellow: '\x1b[33m',
cyan: '\x1b[36m',
magenta: '\x1b[35m',
} as const;
interface CliOptions {
config?: string;
dialect?: string;
key?: string;
host?: string;
modelFilter?: string;
sweepFilter?: string;
delay: number;
maxModels: number;
verbose: boolean;
debug: boolean;
includeSymlinks: boolean;
dryRun: boolean;
sequential: boolean;
}
// Types: Config File
interface SweepConfigFile {
delayMs?: number;
maxTokens?: number;
vendors: Record<string, SweepConfigFile_Vendor>;
}
interface SweepConfigFile_Vendor {
access: AixAPI_Access;
sweeps?: string[]; // names of built-in sweeps from SWEEP_DEFINITIONS
modelFilter?: string | string[]; // prefix(es) to match model IDs
baseModelOverrides?: Partial<AixAPI_Model>;
}
// Types: Sweep Results
interface VendorSweepResult {
vendorName: string;
dialect: AixAPI_Access['dialect'];
modelsAvailable: number;
modelsTested: number;
models: ModelSweepResult[];
modelFilter?: string; // effective filter used (from config and/or CLI)
}
interface ModelSweepResult {
modelId: string;
modelLabel: string;
results: TestResult[];
}
interface TestResult {
sweepName: string;
paramValue: SweepValue;
outcome: TestOutcome;
errorMessage: string | null; // source of truth for non-pass outcomes (always set when outcome !== 'pass')
errorCategory?: ErrorCategory; // secondary, used for symbol display
httpStatus?: number;
responseText?: string;
verboseLogs: string[]; // --verbose: response/error details
debugRequestAixModel?: string; // --debug: AixAPI_Model JSON
debugRequestBody?: string; // --debug: request body JSON
durationMs: number;
}
type TestOutcome = 'pass' | 'fail' | 'truncated' | 'error';
type ErrorCategory =
| 'exception' // exception testing the parameter
| 'dialect' // parsing fails
| 'abort' | 'connection' | 'http' | 'parse'; // tRPC errors
// ============================================================================
// SweepCollectorTransmitter - Lightweight IParticleTransmitter for probing
// ============================================================================
class SweepCollectorTransmitter implements IParticleTransmitter {
text: string = '';
dialectIssue: string | null = null;
tokenStopReason: AixWire_Particles.GCTokenStopReason | null = null;
endReason: string | null = null;
get hasText(): boolean { return this.text.length > 0; }
get hasError(): boolean { return this.dialectIssue !== null; }
// Parser-initiated Control
setEnded(reason: 'done-dialect' | 'issue-dialect'): void {
this.endReason = reason;
}
setDialectTerminatingIssue(dialectText: string, _symbol: string | null, _serverLog: ParticleServerLogLevel): void {
this.dialectIssue = dialectText;
}
// Parts data - only collect text, everything else is a no-op
endMessagePart(): void { /* no-op */ }
appendText(textChunk: string): void {
this.text += textChunk;
}
appendReasoningText(_textChunk: string, _options?: { weak?: 'tag'; restart?: boolean }): void { /* no-op */ }
setReasoningSignature(_signature: string): void { /* no-op */ }
addReasoningRedactedData(_data: string): void { /* no-op */ }
appendAutoText_weak(textChunk: string): void { this.text += textChunk; }
appendAudioInline(_mimeType: string, _base64Data: string, _label: string, _generator: string, _durationMs: number): void { /* no-op */ }
appendImageInline(_mimeType: string, _base64Data: string, _label: string, _generator: string, _prompt: string): void { /* no-op */ }
startFunctionCallInvocation(_id: string | null, _functionName: string, _expectedArgsFmt: 'incr_str' | 'json_object', _args: string | object | null): void { /* no-op */ }
appendFunctionCallInvocationArgs(_id: string | null, _argsJsonChunk: string): void { /* no-op */ }
addCodeExecutionInvocation(_id: string | null, _language: string, _code: string, _author: 'gemini_auto_inline' | 'code_interpreter'): void { /* no-op */ }
addCodeExecutionResponse(_id: string | null, _error: boolean | string, _result: string, _executor: 'gemini_auto_inline' | 'code_interpreter', _environment: 'upstream'): void { /* no-op */ }
appendUrlCitation(_title: string, _url: string, _citationNumber?: number, _startIndex?: number, _endIndex?: number, _textSnippet?: string, _pubTs?: number): void { /* no-op */ }
// Special
sendControl(_cgCOp: AixWire_Particles.ChatControlOp, _flushQueue?: boolean): void { /* no-op */ }
sendVoidPlaceholder(_mot: 'search-web' | 'gen-image' | 'code-exec', _text: string): void { /* no-op */ }
sendSetVendorState(_vendor: string, _state: unknown): void { /* no-op */ }
// Non-parts data
setModelName(_modelName: string): void { /* no-op */ }
setUpstreamHandle(_handle: string, _type: 'oai-responses'): void { /* no-op */ }
setTokenStopReason(reason: AixWire_Particles.GCTokenStopReason): void { this.tokenStopReason = reason; }
updateMetrics(_update: Partial<AixWire_Particles.CGSelectMetrics>): void { /* no-op */ }
}
// ============================================================================
// Minimal Request Construction
// ============================================================================
@@ -383,7 +377,6 @@ async function testParameterValue(
value: SweepValue,
maxTokens: number,
baseModelOverrides: Partial<AixAPI_Model> | undefined,
debug: boolean = false,
): Promise<TestResult> {
const startTime = Date.now();
const baseModel = createBaseModel(modelId, maxTokens);
@@ -656,15 +649,19 @@ function printSweepSummary(results: VendorSweepResult[]): void {
// Results File (per-dialect: llm-{dialect}-parameters-sweep.json)
// ============================================================================
// Results file format: dialect -> model -> sweep -> passing values
type DialectReultsByModel = Record<string, ModelResultsBySweep>;
type ModelResultsBySweep = Record<string, SweepValue[]>;
function getResultsFilePath(dialect: string): string {
const scriptDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1'));
return path.join(scriptDir, `llm-${dialect}-parameters-sweep.json`);
}
function saveDialectResults(dialect: string, dialectResults: DialectResults, evaluatedSweeps: string[]): void {
function saveDialectResults(dialect: string, dialectResults: DialectReultsByModel, evaluatedSweeps: string[], modelFilter?: string): void {
const filePath = getResultsFilePath(dialect);
// Sort keys for stable output
const sorted: DialectResults = {};
const sorted: DialectReultsByModel = {};
for (const model of Object.keys(dialectResults).sort()) {
sorted[model] = {};
for (const sweep of Object.keys(dialectResults[model]).sort()) {
@@ -686,16 +683,18 @@ function saveDialectResults(dialect: string, dialectResults: DialectResults, eva
// Insert header comments after opening brace
const comment1 = '"_comment": "API-validated parameter values. null=undefined/missing. Values are tested and working. Note: temperature is continuous, not discrete.",';
const comment2 = `"_evaluated": "Evaluated: ${evaluatedSweeps.sort().join(', ')}. If missing, the parameter is not supported by that model.",`;
const json = jsonBody.replace(/^\{\n {2}/, '{\n ' + comment1 + '\n ' + comment2 + '\n ');
const comment3 = modelFilter ? `"_modelFilter": "${modelFilter}",` : '';
const comments = [comment1, comment2, comment3].filter(Boolean).join('\n ');
const json = jsonBody.replace(/^\{\n {2}/, '{\n ' + comments + '\n ');
fs.writeFileSync(filePath, json + '\n', 'utf-8');
console.log(`${COLORS.dim}Results saved to: ${filePath}${COLORS.reset}`);
}
function vendorResultToDialectResults(vendorResult: VendorSweepResult): DialectResults {
const dialectResults: DialectResults = {};
function vendorResultToDialectResults(vendorResult: VendorSweepResult): DialectReultsByModel {
const dialectResults: DialectReultsByModel = {};
for (const model of vendorResult.models) {
const modelResults: ModelResults = {};
const modelResults: ModelResultsBySweep = {};
// Group results by sweep name
const bySweep = new Map<string, TestResult[]>();
@@ -705,8 +704,10 @@ function vendorResultToDialectResults(vendorResult: VendorSweepResult): DialectR
}
// Sweeps that become "tools" when fully supported
const toolSweeps = ['oai-image-generation', 'oai-web-search', 'xai-web-search'];
const toolSweeps = ['oai-image-generation', 'oai-web-search'];
const tools: string[] = [];
const xaiToolSweeps = ['xai-web-search'];
const xaiTools: string[] = [];
// Extract passing values for each sweep (skip if none passed)
for (const [sweepName, sweepResults] of bySweep) {
@@ -721,6 +722,10 @@ function vendorResultToDialectResults(vendorResult: VendorSweepResult): DialectR
tools.push(sweepName);
continue;
}
if (xaiToolSweeps.includes(sweepName) && passingValues.length === sweepResults.length) {
xaiTools.push(sweepName);
continue;
}
// Special case: temperature with contiguous range from 0 -> use range [min, max]
if (sweepName === 'temperature') {
@@ -741,6 +746,8 @@ function vendorResultToDialectResults(vendorResult: VendorSweepResult): DialectR
// Add tools array if non-empty
if (tools.length > 0)
modelResults['tools'] = tools.sort();
if (xaiTools.length > 0)
modelResults['xai-tools'] = xaiTools.sort();
dialectResults[model.modelId] = modelResults;
}
@@ -759,7 +766,7 @@ function saveAllResults(allResults: VendorSweepResult[]): void {
}
}
const dialectResults = vendorResultToDialectResults(vendorResult);
saveDialectResults(vendorResult.dialect, dialectResults, [...evaluatedSweeps]);
saveDialectResults(vendorResult.dialect, dialectResults, [...evaluatedSweeps], vendorResult.modelFilter);
}
}
@@ -768,7 +775,7 @@ function saveAllResults(allResults: VendorSweepResult[]): void {
// Config Loading
// ============================================================================
function loadSweepConfig(configPath: string): SweepConfig {
function loadSweepConfig(configPath: string): SweepConfigFile {
const fullPath = path.resolve(configPath);
if (!fs.existsSync(fullPath))
throw new Error(`Configuration file not found: ${fullPath}`);
@@ -779,11 +786,11 @@ function loadSweepConfig(configPath: string): SweepConfig {
// Support both old format (flat Record<string, AixAPI_Access>) and new format (SweepConfig)
if (parsed.vendors) {
return parsed as SweepConfig;
return parsed as SweepConfigFile;
}
// Legacy: flat Record<string, AixAPI_Access>
const vendors: Record<string, VendorSweepConfig> = {};
const vendors: Record<string, SweepConfigFile_Vendor> = {};
for (const [key, value] of Object.entries(parsed)) {
if (key.startsWith('_')) continue;
if (typeof value === 'object' && value !== null && 'dialect' in value)
@@ -795,7 +802,7 @@ function loadSweepConfig(configPath: string): SweepConfig {
}
}
function createSingleVendorConfig(dialect: string, key: string, host?: string): SweepConfig {
function createSingleVendorConfig(dialect: string, key: string, host?: string): SweepConfigFile {
let access: AixAPI_Access;
switch (dialect) {
@@ -1025,7 +1032,7 @@ function sleep(ms: number): Promise<void> {
// ============================================================================
async function runSweep(
sweepConfig: SweepConfig,
sweepConfig: SweepConfigFile,
options: CliOptions,
): Promise<VendorSweepResult[]> {
const allResults: VendorSweepResult[] = [];
@@ -1067,7 +1074,7 @@ async function runSweep(
// 2b. Filter out duplicate models with idVariant (keep base model, or first variant if no base)
{
const beforeCount = models.length;
// const beforeCount = models.length;
// Sort so base models (no idVariant) come before variants
models.sort((a, b) => (a.idVariant ? 1 : 0) - (b.idVariant ? 1 : 0));
const seenIds = new Set<string>();
@@ -1136,12 +1143,22 @@ async function runSweep(
}
console.log(` Applicable sweeps: ${applicableSweeps.map(s => COLORS.magenta + s.name + COLORS.reset).join(', ')}`);
// Build effective model filter string for JSON output
const effectiveFilters: string[] = [];
if (vendorModelFilter) {
const prefixes = Array.isArray(vendorModelFilter) ? vendorModelFilter : [vendorModelFilter];
effectiveFilters.push(...prefixes);
}
if (options.modelFilter)
effectiveFilters.push(options.modelFilter);
const vendorResult: VendorSweepResult = {
vendorName,
dialect: access.dialect,
modelsAvailable: models.length,
modelsTested: models.length,
models: [],
modelFilter: effectiveFilters.length > 0 ? effectiveFilters.join(', ') : undefined,
};
// 5. For each model
@@ -1257,7 +1274,7 @@ async function main(): Promise<void> {
const options = parseArgs();
// Load vendor config
let sweepConfig: SweepConfig;
let sweepConfig: SweepConfigFile;
if (options.config) {
sweepConfig = loadSweepConfig(options.config);
} else {