diff --git a/src/modules/aix/server/dispatch/chatGenerate/parsers/anthropic.parser.ts b/src/modules/aix/server/dispatch/chatGenerate/parsers/anthropic.parser.ts index b596fe3bd..e06d3d55e 100644 --- a/src/modules/aix/server/dispatch/chatGenerate/parsers/anthropic.parser.ts +++ b/src/modules/aix/server/dispatch/chatGenerate/parsers/anthropic.parser.ts @@ -401,6 +401,10 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction { if (delta.container) _emitContainerState(pt, delta.container); + // -> Refusal details (structured) - surface category/explanation when stop_reason === 'refusal' + if (delta.stop_reason === 'refusal' && delta.stop_details) + _emitRefusalDetails(pt, delta.stop_details); + // -> Token Stop Reason const tokenStopReason = _fromAnthropicStopReason(delta.stop_reason, 'message_delta'); if (tokenStopReason !== null) @@ -511,6 +515,7 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction { content, container, stop_reason, + stop_details, usage, } = AnthropicWire_API_Message_Create.Response_schema.parse(JSON.parse(fullData)); @@ -650,6 +655,10 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction { _createAnthropicPauseTurnContinuation(content, container?.id), ); + // -> Refusal details (structured) - surface category/explanation when stop_reason === 'refusal' + if (stop_reason === 'refusal' && stop_details) + _emitRefusalDetails(pt, stop_details); + // -> Token Stop Reason (pause_turn already thrown above) const tokenStopReason = _fromAnthropicStopReason(stop_reason, 'parser_NS'); if (tokenStopReason !== null) @@ -681,6 +690,20 @@ function _emitContainerState(pt: IParticleTransmitter, container: { id: string; }); } +/** + * Surface structured refusal details (stop_reason === 'refusal') as inline text. + * Anthropic's streaming classifiers can intervene mid-generation; appending the category + explanation + * as text lets the user see WHY the model refused without touching terminationReason (which message_stop + * will set to 'done-dialect') - avoids a spurious override warning. + */ +function _emitRefusalDetails(pt: IParticleTransmitter, stopDetails: { type: 'refusal'; category?: 'cyber' | 'bio' | null; explanation?: string | null }): void { + const parts: string[] = []; + if (stopDetails.category) parts.push(`[${stopDetails.category}]`); + if (stopDetails.explanation) parts.push(stopDetails.explanation); + if (!parts.length) return; + pt.appendText(`\n\n${IssueSymbols.PromptBlocked} **Refusal:** ${parts.join(' ')}`); +} + // --- Shared server tool result handlers (used by both S and NS parsers) --- diff --git a/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts index 8262b0e77..1f363a5aa 100644 --- a/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/anthropic.wiretypes.ts @@ -13,6 +13,10 @@ const hotFixAntShipNoEmptyTextBlocks = true; // Replace empty text blocks with a * * ## Updates * + * ### 2026-04-24 - API Sync: stop_details for structured refusals + * - Response: added `stop_details` ({ type: 'refusal', category: 'cyber'|'bio'|null, explanation: string|null }) + * - event_MessageDelta.delta: added `stop_details` (arrives alongside stop_reason in streaming) + * * ### 2026-03-21 - API Sync: GA tool versions, thinking display, caller updates, cache_control * - Tools: Added web_search_20260209 (GA), web_fetch_20260209/20260309 (GA), code_execution_20260120 (GA REPL) * - Request: Added top-level `cache_control` for automatic caching (Feb 2026) @@ -825,6 +829,17 @@ export namespace AnthropicWire_API_Message_Create { 'model_context_window_exceeded', ]); + /** + * Structured stop details, paired with stop_reason. Currently only populated when stop_reason === 'refusal'. + * - category: 'cyber' | 'bio' when the refusal maps to a named policy category, null otherwise + * - explanation: human-readable explanation (NOT guaranteed stable), null when unavailable + */ + const StopDetails_schema = z.object({ + type: z.literal('refusal'), + category: z.enum(['cyber', 'bio']).nullish(), + explanation: z.string().nullish(), + }); + /// Request export type Request = z.infer; @@ -1030,6 +1045,12 @@ export namespace AnthropicWire_API_Message_Create { // Which custom stop sequence was generated, if any. stop_sequence: z.string().nullable(), + /** + * Structured stop details. Present when stop_reason === 'refusal' (carries category + explanation). + * In streaming, stop_details is null at message_start and appears on message_delta alongside stop_reason. + */ + stop_details: StopDetails_schema.nullish(), + /** * Billing and rate-limit usage. * Token counts represent the underlying cost to Anthropic's systems. @@ -1088,6 +1109,10 @@ export namespace AnthropicWire_API_Message_Create { delta: z.object({ stop_reason: StopReason_schema.nullable(), stop_sequence: z.string().nullable(), + /** + * Structured stop details - present alongside stop_reason === 'refusal' (category + explanation). + */ + stop_details: StopDetails_schema.nullish(), /** * Container state updates - present when Skills/code_execution tools are used. * Provides container id/expiry that may differ from message_start if the container was created mid-stream.