From 9537ce59e8a26bc346b41f2b744fcf429f4db77b Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Mon, 20 Apr 2026 13:18:55 -0700 Subject: [PATCH] LLM: cap initial max response to 128k --- src/modules/llms/llm.client.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/modules/llms/llm.client.ts b/src/modules/llms/llm.client.ts index 943a0d274..da71e4be1 100644 --- a/src/modules/llms/llm.client.ts +++ b/src/modules/llms/llm.client.ts @@ -18,6 +18,12 @@ import { findServiceAccessOrThrow } from './vendors/vendor.helpers'; */ export const LLMS_VARIANT_SEPARATOR = '::' as const; +// Cap for the *initial* llmResponseTokens default to avoid runaway defaults on huge-context models. +// The model's maxOutputTokens is unchanged (vendor-reported cap remains true); users can still raise +// llmResponseTokens via the slider up to maxOutputTokens. On reset, this capped initial is used. +const _INITIAL_RESPONSE_TOKENS_CAP = 128_000; + + function _clientIdWithVariant(id: string, idVariant?: string): string { return !idVariant ? id : idVariant.startsWith(LLMS_VARIANT_SEPARATOR) ? `${id}${idVariant}` @@ -86,8 +92,9 @@ function _createDLLMFromModelDescription(d: ModelDescriptionSchema, service: DMo const contextTokens = d.contextWindow || null; const maxOutputTokens = d.maxCompletionTokens || (contextTokens ? Math.round(contextTokens / 2) : null); // fallback to half context window - // initial (user overridable) response tokens setting: equal to the max, if the max is given, or to 1/8th of the context window (when max is set to 1/2 of context) - const llmResponseTokens = !maxOutputTokens ? null : !d.maxCompletionTokens ? Math.round(maxOutputTokens / 4) : d.maxCompletionTokens; + // initial (user overridable) response tokens setting: equal to the max, if the max is given, or to 1/8th of the context window (when max is set to 1/2 of context); clamped to cap + const llmResponseTokens = !maxOutputTokens ? null + : Math.min(d.maxCompletionTokens ?? Math.round(maxOutputTokens / 4), _INITIAL_RESPONSE_TOKENS_CAP); // DLLM is a fundamental type in our application