AIX: OpenAI: parse annotations/citations

2026-05-10 21:50:14 -07:00 · 2025-03-11 21:40:25 -07:00
parent 84f989d6da
commit 983e964e36
2 changed files with 89 additions and 22 deletions
@@ -35,6 +35,7 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct
  let hasBegun = false;
  let hasWarned = false;
  let timeToFirstEvent: number | undefined;
+  let progressiveCitationNumber = 1;
  // NOTE: could compute rate (tok/s) from the first textful event to the last (to ignore the prefill time)

  // Supporting structure to accumulate the assistant message
@@ -195,6 +196,22 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct

      } // .choices.tool_calls[]

+      // [OpenAI, 2025-03-11] delta: Annotations[].url_citation
+      if (delta.annotations !== undefined) {
+
+        if (Array.isArray(delta.annotations)) {
+          for (const { type: annotationType, url_citation: urlCitation } of delta.annotations) {
+            if (annotationType !== 'url_citation')
+              throw new Error(`unexpected annotation type: ${annotationType}`);
+            pt.appendUrlCitation(urlCitation.title, urlCitation.url, undefined, urlCitation.start_index, urlCitation.end_index, undefined);
+          }
+        } else {
+          // we don't abort for this issue - for our users
+          console.log('AIX: OpenAI-dispatch: unexpected annotations:', delta.annotations);
+        }
+
+      }
+
      // Token Stop Reason - usually missing in all but the last chunk, but we don't rely on it
      if (finish_reason) {
        const tokenStopReason = _fromOpenAIFinishReason(finish_reason);
@@ -216,6 +233,7 @@ export function createOpenAIChatCompletionsChunkParser(): ChatGenerateParseFunct

 export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction {
  const parserCreationTimestamp = Date.now();
+  let progressiveCitationNumber = 1;

  return function(pt: IParticleTransmitter, eventData: string) {

@@ -285,6 +303,22 @@ export function createOpenAIChatCompletionsParserNS(): ChatGenerateParseFunction
      if (tokenStopReason !== null)
        pt.setTokenStopReason(tokenStopReason);

+      // [OpenAI, 2025-03-11] message: Annotations[].url_citation
+      if (message.annotations !== undefined) {
+
+        if (Array.isArray(message.annotations)) {
+          for (const { type: annotationType, url_citation: urlCitation } of message.annotations) {
+            if (annotationType !== 'url_citation')
+              throw new Error(`unexpected annotation type: ${annotationType}`);
+            pt.appendUrlCitation(urlCitation.title, urlCitation.url, undefined, urlCitation.start_index, urlCitation.end_index, undefined);
+          }
+        } else {
+          // we don't abort for this issue
+          console.log('AIX: OpenAI-dispatch-NS unexpected annotations:', message.annotations);
+        }
+
+      }
+
    } // .choices[]

  };
@@ -89,6 +89,18 @@ export namespace OpenAIWire_ContentParts {
    PredictedFunctionCall_schema,
  ]);

+  /// Annotation - Output - maybe not even content parts
+
+  export const OpenAI_AnnotationObject_schema = z.object({
+    type: z.literal('url_citation'),
+    url_citation: z.object({
+      start_index: z.number().optional(),
+      end_index: z.number().optional(),
+      title: z.string(),
+      url: z.string(),
+    }),
+  });
+
 }

 export namespace OpenAIWire_Messages {
@@ -116,7 +128,7 @@ export namespace OpenAIWire_Messages {
    // name: _optionalParticipantName,
  });

-  const AssistantMessage_schema = z.object({
+  export const AssistantMessage_schema = z.object({
    role: z.literal('assistant'),
    /**
     * The contents of the assistant message. Required unless tool_calls or function_call is specified.
@@ -144,27 +156,9 @@ export namespace OpenAIWire_Messages {
    audio: z.object({
      id: z.string(),
    }).nullable().optional(),
-    // name: _optionalParticipantName,
-  });

-  export const AssistantMessage_NS_schema = AssistantMessage_schema.extend({
-    //
-    // IMPORTANT - this message *extends* the AssistantMessage_schema, to inherit all fields while performing any other change
-    //
-
-    // .optional: when parsing a non-streaming message with just a FC, the content can be missing
-    content: z.string().nullable().optional(),
-
-    /**
-     * [OpenAI, 2024-10-17] Audio output (non-streaming only)
-     * If the audio output modality is requested, this object contains data about the audio response from the model
-     */
-    audio: z.object({
-      id: z.string(),
-      data: z.string(), // Base64 encoded audio data
-      expires_at: z.number(), // Unix timestamp
-      transcript: z.string().optional(),
-    }).nullable().optional(),
+    // function_call: // ignored, as it's deprecated
+    // name: _optionalParticipantName, // omitted by choice: generally unsupported
  });

  const ToolMessage_schema = z.object({
@@ -411,12 +405,46 @@ export namespace OpenAIWire_API_Chat_Completions {
    prompt_cache_miss_tokens: z.number().optional(),
  }).nullable();

+  /**
+   * NOTE: this is effectively the OUTPUT message (from the Chat Completion output object).
+   * - 2025-03-11: the docs show that 'role' is not mandated to be 'assistant' anymore and could be different
+   */
+  const ChoiceMessage_NS_schema = OpenAIWire_Messages.AssistantMessage_schema.extend({
+    //
+    // IMPORTANT - this message *extends* the AssistantMessage_schema, to inherit all fields while performing any other change
+    //
+
+    // .string, instead of .assistant -- but we keep it strict for now, for parser correctness
+    // role: z.string(),
+
+    // .optional: when parsing a non-streaming message with just a FC, the content can be missing
+    content: z.string().nullable().optional(),
+
+    /**
+     * [OpenAI, 2025-03-11] Annotations
+     * This is a full assistant message, which is parsed by the non-streaming parser.
+     */
+    annotations: z.array(OpenAIWire_ContentParts.OpenAI_AnnotationObject_schema).nullable().optional(),
+
+    /**
+     * [OpenAI, 2024-10-17] Audio output (non-streaming only)
+     * If the audio output modality is requested, this object contains data about the audio response from the model
+     */
+    audio: z.object({
+      id: z.string(),
+      data: z.string(), // Base64 encoded audio data
+      expires_at: z.number(), // Unix timestamp
+      transcript: z.string().optional(),
+    }).nullable().optional(),
+
+  });
+
  const Choice_NS_schema = z.object({
    index: z.number(),

    // NOTE: the OpenAI api does not force role: 'assistant', it's only induced
    // We recycle the assistant message response here, with either content or tool_calls
-    message: OpenAIWire_Messages.AssistantMessage_NS_schema,
+    message: ChoiceMessage_NS_schema,

    finish_reason: z.union([FinishReason_Enum, z.string()])
      .nullable(),
@@ -507,6 +535,11 @@ export namespace OpenAIWire_API_Chat_Completions {
    tool_calls: z.array(ChunkDeltaToolCalls_schema).optional()
      .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160
    refusal: z.string().nullable().optional(), // [OpenAI, 2024-10-01] refusal message
+    /**
+     * [OpenAI, 2025-03-11] Annotations
+     * not documented yet in the API guide; shall improve this once defined
+     */
+    annotations: z.array(OpenAIWire_ContentParts.OpenAI_AnnotationObject_schema).optional(),
  });

  const ChunkChoice_schema = z.object({