Llms: fix Streaming timeouts (2)

2026-05-10 21:50:14 -07:00 · 2024-04-23 02:07:20 -07:00
parent 2f8e879976
commit cbda1d7cd0
2 changed files with 12 additions and 11 deletions
@@ -217,7 +217,8 @@ function createEventStreamTransformer(muxingFormat: MuxingFormat, vendorTextPars

      // Send initial packet indicating the start of the stream
      const startPacket: ChatStreamingPreambleStartSchema = { type: 'start' };
-      controller.enqueue(textEncoder.encode(JSON.stringify(startPacket)));
+      const preambleStart = JSON.stringify(startPacket) + '\n';
+      controller.enqueue(textEncoder.encode(preambleStart));

      // only used for debugging
      let debugLastMs: number | null = null;
@@ -306,8 +307,8 @@ function createStreamParserAnthropicMessages(): AIStreamParser {
        responseMessage = anthropicWireMessagesResponseSchema.parse(message);
        // hack: prepend the model name to the first packet
        if (firstMessage) {
-          const firstPacket: ChatStreamingPreambleModelSchema = { model: responseMessage.model };
-          text = JSON.stringify(firstPacket);
+          const preambleModel: ChatStreamingPreambleModelSchema = { model: responseMessage.model };
+          text = JSON.stringify(preambleModel) + '\n';
        }
        break;

@@ -421,8 +422,8 @@ function createStreamParserGemini(modelName: string): AIStreamParser {
    // hack: prepend the model name to the first packet
    if (!hasBegun) {
      hasBegun = true;
-      const firstPacket: ChatStreamingPreambleModelSchema = { model: modelName };
-      text = JSON.stringify(firstPacket) + text;
+      const preambleModel: ChatStreamingPreambleModelSchema = { model: modelName };
+      text = JSON.stringify(preambleModel) + '\n' + text;
    }

    return { text, close: false };
@@ -457,8 +458,8 @@ function createStreamParserOllama(): AIStreamParser {
    // hack: prepend the model name to the first packet
    if (!hasBegun && chunk.model) {
      hasBegun = true;
-      const firstPacket: ChatStreamingPreambleModelSchema = { model: chunk.model };
-      text = JSON.stringify(firstPacket) + text;
+      const preambleModel: ChatStreamingPreambleModelSchema = { model: chunk.model };
+      text = JSON.stringify(preambleModel) + '\n' + text;
    }

    return { text, close: chunk.done };
@@ -498,8 +499,8 @@ function createStreamParserOpenAI(): AIStreamParser {
    // hack: prepend the model name to the first packet
    if (!hasBegun) {
      hasBegun = true;
-      const firstPacket: ChatStreamingPreambleModelSchema = { model: json.model };
-      text = JSON.stringify(firstPacket) + text;
+      const preambleModel: ChatStreamingPreambleModelSchema = { model: json.model };
+      text = JSON.stringify(preambleModel) + '\n' + text;
    }

    // [LocalAI] workaround: LocalAI doesn't send the [DONE] event, but similarly to OpenAI, it sends a "finish_reason" delta update
@@ -94,10 +94,10 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
    while ((!parsedPreambleStart || !parsedPreableModel) && incrementalText.startsWith('{')) {

      // extract a complete JSON object, if present
-      const endOfJson = incrementalText.indexOf('}');
+      const endOfJson = incrementalText.indexOf('}\n');
      if (endOfJson === -1) break;
      const jsonString = incrementalText.substring(0, endOfJson + 1);
-      incrementalText = incrementalText.substring(endOfJson + 1);
+      incrementalText = incrementalText.substring(endOfJson + 2);

      // first packet: preamble to let the Vercel edge function go over time
      if (!parsedPreambleStart) {