AIX: Anthropic: show refusal details, if present, as inline text

This commit is contained in:
Enrico Ros
2026-04-24 15:12:06 -07:00
parent aa2c4f06b7
commit b21b8cc982
2 changed files with 48 additions and 0 deletions
@@ -401,6 +401,10 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
if (delta.container)
_emitContainerState(pt, delta.container);
// -> Refusal details (structured) - surface category/explanation when stop_reason === 'refusal'
if (delta.stop_reason === 'refusal' && delta.stop_details)
_emitRefusalDetails(pt, delta.stop_details);
// -> Token Stop Reason
const tokenStopReason = _fromAnthropicStopReason(delta.stop_reason, 'message_delta');
if (tokenStopReason !== null)
@@ -511,6 +515,7 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
content,
container,
stop_reason,
stop_details,
usage,
} = AnthropicWire_API_Message_Create.Response_schema.parse(JSON.parse(fullData));
@@ -650,6 +655,10 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
_createAnthropicPauseTurnContinuation(content, container?.id),
);
// -> Refusal details (structured) - surface category/explanation when stop_reason === 'refusal'
if (stop_reason === 'refusal' && stop_details)
_emitRefusalDetails(pt, stop_details);
// -> Token Stop Reason (pause_turn already thrown above)
const tokenStopReason = _fromAnthropicStopReason(stop_reason, 'parser_NS');
if (tokenStopReason !== null)
@@ -681,6 +690,20 @@ function _emitContainerState(pt: IParticleTransmitter, container: { id: string;
});
}
/**
* Surface structured refusal details (stop_reason === 'refusal') as inline text.
* Anthropic's streaming classifiers can intervene mid-generation; appending the category + explanation
* as text lets the user see WHY the model refused without touching terminationReason (which message_stop
* will set to 'done-dialect') - avoids a spurious override warning.
*/
function _emitRefusalDetails(pt: IParticleTransmitter, stopDetails: { type: 'refusal'; category?: 'cyber' | 'bio' | null; explanation?: string | null }): void {
const parts: string[] = [];
if (stopDetails.category) parts.push(`[${stopDetails.category}]`);
if (stopDetails.explanation) parts.push(stopDetails.explanation);
if (!parts.length) return;
pt.appendText(`\n\n${IssueSymbols.PromptBlocked} **Refusal:** ${parts.join(' ')}`);
}
// --- Shared server tool result handlers (used by both S and NS parsers) ---
@@ -13,6 +13,10 @@ const hotFixAntShipNoEmptyTextBlocks = true; // Replace empty text blocks with a
*
* ## Updates
*
* ### 2026-04-24 - API Sync: stop_details for structured refusals
* - Response: added `stop_details` ({ type: 'refusal', category: 'cyber'|'bio'|null, explanation: string|null })
* - event_MessageDelta.delta: added `stop_details` (arrives alongside stop_reason in streaming)
*
* ### 2026-03-21 - API Sync: GA tool versions, thinking display, caller updates, cache_control
* - Tools: Added web_search_20260209 (GA), web_fetch_20260209/20260309 (GA), code_execution_20260120 (GA REPL)
* - Request: Added top-level `cache_control` for automatic caching (Feb 2026)
@@ -825,6 +829,17 @@ export namespace AnthropicWire_API_Message_Create {
'model_context_window_exceeded',
]);
/**
* Structured stop details, paired with stop_reason. Currently only populated when stop_reason === 'refusal'.
* - category: 'cyber' | 'bio' when the refusal maps to a named policy category, null otherwise
* - explanation: human-readable explanation (NOT guaranteed stable), null when unavailable
*/
const StopDetails_schema = z.object({
type: z.literal('refusal'),
category: z.enum(['cyber', 'bio']).nullish(),
explanation: z.string().nullish(),
});
/// Request
export type Request = z.infer<typeof Request_schema>;
@@ -1030,6 +1045,12 @@ export namespace AnthropicWire_API_Message_Create {
// Which custom stop sequence was generated, if any.
stop_sequence: z.string().nullable(),
/**
* Structured stop details. Present when stop_reason === 'refusal' (carries category + explanation).
* In streaming, stop_details is null at message_start and appears on message_delta alongside stop_reason.
*/
stop_details: StopDetails_schema.nullish(),
/**
* Billing and rate-limit usage.
* Token counts represent the underlying cost to Anthropic's systems.
@@ -1088,6 +1109,10 @@ export namespace AnthropicWire_API_Message_Create {
delta: z.object({
stop_reason: StopReason_schema.nullable(),
stop_sequence: z.string().nullable(),
/**
* Structured stop details - present alongside stop_reason === 'refusal' (category + explanation).
*/
stop_details: StopDetails_schema.nullish(),
/**
* Container state updates - present when Skills/code_execution tools are used.
* Provides container id/expiry that may differ from message_start if the container was created mid-stream.