replaces eventstream lib to (hopefully) fix interrupted AWS streams
This commit is contained in:
Generated
+24
-65
@@ -11,9 +11,11 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/tokenizer": "^0.0.4",
|
||||
"@aws-crypto/sha256-js": "^5.1.0",
|
||||
"@smithy/eventstream-codec": "^2.1.1",
|
||||
"@smithy/protocol-http": "^3.0.6",
|
||||
"@smithy/signature-v4": "^2.0.10",
|
||||
"@smithy/types": "^2.3.4",
|
||||
"@smithy/util-utf8": "^2.1.1",
|
||||
"axios": "^1.3.5",
|
||||
"check-disk-space": "^3.4.0",
|
||||
"cookie-parser": "^1.4.6",
|
||||
@@ -27,7 +29,6 @@
|
||||
"firebase-admin": "^11.10.1",
|
||||
"googleapis": "^122.0.0",
|
||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||
"lifion-aws-event-stream": "^1.0.7",
|
||||
"memorystore": "^1.6.7",
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"node-schedule": "^2.1.1",
|
||||
@@ -837,20 +838,20 @@
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/@smithy/eventstream-codec": {
|
||||
"version": "2.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/eventstream-codec/-/eventstream-codec-2.0.10.tgz",
|
||||
"integrity": "sha512-3SSDgX2nIsFwif6m+I4+ar4KDcZX463Noes8ekBgQHitULiWvaDZX8XqPaRQSQ4bl1vbeVXHklJfv66MnVO+lw==",
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/eventstream-codec/-/eventstream-codec-2.1.1.tgz",
|
||||
"integrity": "sha512-E8KYBxBIuU4c+zrpR22VsVrOPoEDzk35bQR3E+xm4k6Pa6JqzkDOdMyf9Atac5GPNKHJBdVaQ4JtjdWX2rl/nw==",
|
||||
"dependencies": {
|
||||
"@aws-crypto/crc32": "3.0.0",
|
||||
"@smithy/types": "^2.3.4",
|
||||
"@smithy/util-hex-encoding": "^2.0.0",
|
||||
"@smithy/types": "^2.9.1",
|
||||
"@smithy/util-hex-encoding": "^2.1.1",
|
||||
"tslib": "^2.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@smithy/is-array-buffer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.0.0.tgz",
|
||||
"integrity": "sha512-z3PjFjMyZNI98JFRJi/U0nGoLWMSJlDjAW4QUX2WNZLas5C0CmVV6LJ01JI0k90l7FvpmixjWxPFmENSClQ7ug==",
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.1.1.tgz",
|
||||
"integrity": "sha512-xozSQrcUinPpNPNPds4S7z/FakDTh1MZWtRP/2vQtYB/u3HYrX2UXuZs+VhaKBd6Vc7g2XPr2ZtwGBNDN6fNKQ==",
|
||||
"dependencies": {
|
||||
"tslib": "^2.5.0"
|
||||
},
|
||||
@@ -889,9 +890,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@smithy/types": {
|
||||
"version": "2.3.4",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/types/-/types-2.3.4.tgz",
|
||||
"integrity": "sha512-D7xlM9FOMFyFw7YnMXn9dK2KuN6+JhnrZwVt1fWaIu8hCk5CigysweeIT/H/nCo4YV+s8/oqUdLfexbkPZtvqw==",
|
||||
"version": "2.9.1",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/types/-/types-2.9.1.tgz",
|
||||
"integrity": "sha512-vjXlKNXyprDYDuJ7UW5iobdmyDm6g8dDG+BFUncAg/3XJaN45Gy5RWWWUVgrzIK7S4R1KWgIX5LeJcfvSI24bw==",
|
||||
"dependencies": {
|
||||
"tslib": "^2.5.0"
|
||||
},
|
||||
@@ -900,11 +901,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@smithy/util-buffer-from": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.0.0.tgz",
|
||||
"integrity": "sha512-/YNnLoHsR+4W4Vf2wL5lGv0ksg8Bmk3GEGxn2vEQt52AQaPSCuaO5PM5VM7lP1K9qHRKHwrPGktqVoAHKWHxzw==",
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.1.1.tgz",
|
||||
"integrity": "sha512-clhNjbyfqIv9Md2Mg6FffGVrJxw7bgK7s3Iax36xnfVj6cg0fUG7I4RH0XgXJF8bxi+saY5HR21g2UPKSxVCXg==",
|
||||
"dependencies": {
|
||||
"@smithy/is-array-buffer": "^2.0.0",
|
||||
"@smithy/is-array-buffer": "^2.1.1",
|
||||
"tslib": "^2.5.0"
|
||||
},
|
||||
"engines": {
|
||||
@@ -912,9 +913,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@smithy/util-hex-encoding": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-2.0.0.tgz",
|
||||
"integrity": "sha512-c5xY+NUnFqG6d7HFh1IFfrm3mGl29lC+vF+geHv4ToiuJCBmIfzx6IeHLg+OgRdPFKDXIw6pvi+p3CsscaMcMA==",
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-2.1.1.tgz",
|
||||
"integrity": "sha512-3UNdP2pkYUUBGEXzQI9ODTDK+Tcu1BlCyDBaRHwyxhA+8xLP8agEKQq4MGmpjqb4VQAjq9TwlCQX0kP6XDKYLg==",
|
||||
"dependencies": {
|
||||
"tslib": "^2.5.0"
|
||||
},
|
||||
@@ -946,11 +947,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@smithy/util-utf8": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.0.0.tgz",
|
||||
"integrity": "sha512-rctU1VkziY84n5OXe3bPNpKR001ZCME2JCaBBFgtiM2hfKbHFudc/BkMuPab8hRbLd0j3vbnBTTZ1igBf0wgiQ==",
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.1.1.tgz",
|
||||
"integrity": "sha512-BqTpzYEcUMDwAKr7/mVRUtHDhs6ZoXDi9NypMvMfOr/+u1NW7JgqodPDECiiLboEm6bobcPcECxzjtQh865e9A==",
|
||||
"dependencies": {
|
||||
"@smithy/util-buffer-from": "^2.0.0",
|
||||
"@smithy/util-buffer-from": "^2.1.1",
|
||||
"tslib": "^2.5.0"
|
||||
},
|
||||
"engines": {
|
||||
@@ -2022,37 +2023,6 @@
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/crc": {
|
||||
"version": "3.8.0",
|
||||
"resolved": "https://registry.npmjs.org/crc/-/crc-3.8.0.tgz",
|
||||
"integrity": "sha512-iX3mfgcTMIq3ZKLIsVFAbv7+Mc10kxabAGQb8HvjA1o3T1PIYprbakQ65d3I+2HGHt6nSKkM9PYjgoJO2KcFBQ==",
|
||||
"dependencies": {
|
||||
"buffer": "^5.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/crc/node_modules/buffer": {
|
||||
"version": "5.7.1",
|
||||
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
|
||||
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/feross"
|
||||
},
|
||||
{
|
||||
"type": "patreon",
|
||||
"url": "https://www.patreon.com/feross"
|
||||
},
|
||||
{
|
||||
"type": "consulting",
|
||||
"url": "https://feross.org/support"
|
||||
}
|
||||
],
|
||||
"dependencies": {
|
||||
"base64-js": "^1.3.1",
|
||||
"ieee754": "^1.1.13"
|
||||
}
|
||||
},
|
||||
"node_modules/create-require": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
|
||||
@@ -3699,17 +3669,6 @@
|
||||
"graceful-fs": "^4.1.9"
|
||||
}
|
||||
},
|
||||
"node_modules/lifion-aws-event-stream": {
|
||||
"version": "1.0.7",
|
||||
"resolved": "https://registry.npmjs.org/lifion-aws-event-stream/-/lifion-aws-event-stream-1.0.7.tgz",
|
||||
"integrity": "sha512-qI0O85OrV5A9rBE++oIaWFjNngk/BqjnJ+3/wdtIPLfFWhPtf+xNuWd/T8lr/wnEpKm/8HbdgYf8pKozk0dPAw==",
|
||||
"dependencies": {
|
||||
"crc": "^3.8.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/limiter": {
|
||||
"version": "1.1.5",
|
||||
"resolved": "https://registry.npmjs.org/limiter/-/limiter-1.1.5.tgz",
|
||||
|
||||
+2
-1
@@ -19,9 +19,11 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/tokenizer": "^0.0.4",
|
||||
"@aws-crypto/sha256-js": "^5.1.0",
|
||||
"@smithy/eventstream-codec": "^2.1.1",
|
||||
"@smithy/protocol-http": "^3.0.6",
|
||||
"@smithy/signature-v4": "^2.0.10",
|
||||
"@smithy/types": "^2.3.4",
|
||||
"@smithy/util-utf8": "^2.1.1",
|
||||
"axios": "^1.3.5",
|
||||
"check-disk-space": "^3.4.0",
|
||||
"cookie-parser": "^1.4.6",
|
||||
@@ -35,7 +37,6 @@
|
||||
"firebase-admin": "^11.10.1",
|
||||
"googleapis": "^122.0.0",
|
||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||
"lifion-aws-event-stream": "^1.0.7",
|
||||
"memorystore": "^1.6.7",
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"node-schedule": "^2.1.1",
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
import { Transform, TransformOptions } from "stream";
|
||||
import {
|
||||
EventStreamCodec,
|
||||
Message,
|
||||
MessageDecoderStream,
|
||||
} from "@smithy/eventstream-codec";
|
||||
import { fromUtf8, toUtf8 } from "@smithy/util-utf8";
|
||||
import { logger } from "../../../../logger";
|
||||
|
||||
const log = logger.child({ module: "aws-eventstream-decoder" });
|
||||
|
||||
/**
|
||||
* Consumes an HTTP response stream and transforms it into a decoded stream of
|
||||
* AWS vnd.amazon.eventstream messages.
|
||||
*
|
||||
* The AWS library uses async iterators, so this class needs to act as a bridge
|
||||
* between the async generator and the Node stream API for downstream consumers.
|
||||
*/
|
||||
export class AWSEventStreamDecoder extends Transform {
|
||||
private readonly decoder: EventStreamCodec;
|
||||
private messageStream: MessageDecoderStream | null = null;
|
||||
private queue: Uint8Array[] = [];
|
||||
private resolveChunk: ((value: Uint8Array | null) => void) | null = null;
|
||||
|
||||
constructor(options?: TransformOptions) {
|
||||
super({ ...options, objectMode: true });
|
||||
this.decoder = new EventStreamCodec(toUtf8, fromUtf8);
|
||||
this.setupStream();
|
||||
}
|
||||
|
||||
protected enqueueChunk(chunk: Uint8Array) {
|
||||
if (this.resolveChunk) {
|
||||
this.resolveChunk(chunk);
|
||||
this.resolveChunk = null;
|
||||
} else {
|
||||
this.queue.push(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
protected dequeueChunk(): Promise<Uint8Array | null> {
|
||||
if (this.queue.length > 0) {
|
||||
return Promise.resolve(this.queue.shift()!);
|
||||
}
|
||||
return new Promise((resolve) => (this.resolveChunk = resolve));
|
||||
}
|
||||
|
||||
protected setupStream() {
|
||||
const that = this;
|
||||
|
||||
// This generator wraps the response stream (via the chunk queue) in an
|
||||
// async iterable that can be consumed by the Amazon EventStream library.
|
||||
const inputGenerator = (async function* () {
|
||||
while (true) {
|
||||
const chunk = await that.dequeueChunk();
|
||||
if (chunk === null) break;
|
||||
yield chunk;
|
||||
}
|
||||
log.debug("Input stream generator finished");
|
||||
});
|
||||
|
||||
// MessageDecoderStream is an async iterator that consumes chunks from
|
||||
// inputGenerator and yields fully decoded individual messages.
|
||||
this.messageStream = new MessageDecoderStream({
|
||||
decoder: this.decoder,
|
||||
inputStream: inputGenerator(),
|
||||
});
|
||||
|
||||
// Start the generator and push messages downstream as they are decoded.
|
||||
let lastMessage: Message | null = null;
|
||||
(async function () {
|
||||
try {
|
||||
log.debug("Starting generator");
|
||||
for await (const message of that.messageStream!) {
|
||||
lastMessage = message;
|
||||
that.push(message);
|
||||
}
|
||||
that.push(null);
|
||||
} catch (err) {
|
||||
log.error({ err, lastMessage }, "Error decoding eventstream message");
|
||||
that.emit("error", err);
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
_transform(chunk: Buffer, _encoding: string, callback: () => void) {
|
||||
this.enqueueChunk(chunk);
|
||||
callback();
|
||||
}
|
||||
|
||||
_flush(callback: () => void) {
|
||||
log.debug("Received end of stream; stopping generator");
|
||||
if (this.resolveChunk) {
|
||||
this.resolveChunk(null);
|
||||
}
|
||||
callback();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import { Transform, TransformOptions } from "stream";
|
||||
|
||||
import { StringDecoder } from "string_decoder";
|
||||
// @ts-ignore
|
||||
import { Parser } from "lifion-aws-event-stream";
|
||||
import { logger } from "../../../../logger";
|
||||
import { RetryableError } from "../index";
|
||||
import { APIFormat } from "../../../../shared/key-management";
|
||||
import { Message } from "@smithy/eventstream-codec";
|
||||
import StreamArray from "stream-json/streamers/StreamArray";
|
||||
import { StringDecoder } from "string_decoder";
|
||||
import { logger } from "../../../../logger";
|
||||
import { APIFormat } from "../../../../shared/key-management";
|
||||
import { makeCompletionSSE } from "../../../../shared/streaming";
|
||||
import { RetryableError } from "../index";
|
||||
import { AWSEventStreamDecoder } from "./aws-eventstream-decoder";
|
||||
|
||||
const log = logger.child({ module: "sse-stream-adapter" });
|
||||
|
||||
@@ -15,25 +14,19 @@ type SSEStreamAdapterOptions = TransformOptions & {
|
||||
contentType?: string;
|
||||
api: APIFormat;
|
||||
};
|
||||
type AwsEventStreamMessage = {
|
||||
headers: {
|
||||
":message-type": "event" | "exception";
|
||||
":exception-type"?: string;
|
||||
};
|
||||
payload: { message?: string /** base64 encoded */; bytes?: string };
|
||||
};
|
||||
|
||||
/**
|
||||
* Receives either text chunks or AWS binary event stream chunks and emits
|
||||
* full SSE events.
|
||||
* Receives either text chunks or AWS vnd.amazon.eventstream messages and emits
|
||||
* full SSE-compliant messages.
|
||||
*/
|
||||
export class SSEStreamAdapter extends Transform {
|
||||
private readonly isAwsStream;
|
||||
private readonly isGoogleStream;
|
||||
private awsParser = new Parser();
|
||||
private awsDecoder = new AWSEventStreamDecoder();
|
||||
private jsonParser = StreamArray.withParser();
|
||||
private partialMessage = "";
|
||||
private decoder = new StringDecoder("utf8");
|
||||
private textDecoder = new TextDecoder("utf8");
|
||||
|
||||
constructor(options?: SSEStreamAdapterOptions) {
|
||||
super(options);
|
||||
@@ -41,10 +34,14 @@ export class SSEStreamAdapter extends Transform {
|
||||
options?.contentType === "application/vnd.amazon.eventstream";
|
||||
this.isGoogleStream = options?.api === "google-ai";
|
||||
|
||||
this.awsParser.on("data", (data: AwsEventStreamMessage) => {
|
||||
const message = this.processAwsEvent(data);
|
||||
if (message) {
|
||||
this.push(Buffer.from(message + "\n\n"), "utf8");
|
||||
this.awsDecoder.on("data", (data: Message) => {
|
||||
try {
|
||||
const message = this.processAwsEvent(data);
|
||||
if (message) {
|
||||
this.push(Buffer.from(message + "\n\n"), "utf8");
|
||||
}
|
||||
} catch (error) {
|
||||
this.emit("error", error);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -56,39 +53,52 @@ export class SSEStreamAdapter extends Transform {
|
||||
});
|
||||
}
|
||||
|
||||
protected processAwsEvent(event: AwsEventStreamMessage): string | null {
|
||||
const { payload, headers } = event;
|
||||
if (headers[":message-type"] === "exception" || !payload.bytes) {
|
||||
const eventStr = JSON.stringify(event);
|
||||
// Under high load, AWS can rugpull us by returning a 200 and starting the
|
||||
// stream but then immediately sending a rate limit error as the first
|
||||
// event. My guess is some race condition in their rate limiting check
|
||||
// that occurs if two requests arrive at the same time when only one
|
||||
// concurrency slot is available.
|
||||
if (headers[":exception-type"] === "throttlingException") {
|
||||
log.warn(
|
||||
{ event: eventStr },
|
||||
"AWS request throttled after streaming has already started; retrying"
|
||||
);
|
||||
throw new RetryableError("AWS request throttled mid-stream");
|
||||
} else {
|
||||
log.error({ event: eventStr }, "Received bad AWS stream event");
|
||||
return makeCompletionSSE({
|
||||
format: "anthropic",
|
||||
title: "Proxy stream error",
|
||||
message:
|
||||
"The proxy received malformed or unexpected data from AWS while streaming.",
|
||||
obj: event,
|
||||
reqId: "proxy-sse-adapter-message",
|
||||
model: "",
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const { bytes } = payload;
|
||||
return [
|
||||
"event: completion",
|
||||
`data: ${Buffer.from(bytes, "base64").toString("utf8")}`,
|
||||
].join("\n");
|
||||
protected processAwsEvent(message: Message): string | null {
|
||||
// Per amazon, headers and body are always present. headers is an object,
|
||||
// body is a Uint8Array, potentially zero-length.
|
||||
const { headers, body } = message;
|
||||
const eventType = headers[":event-type"]?.value;
|
||||
const messageType = headers[":message-type"]?.value;
|
||||
const contentType = headers[":content-type"]?.value;
|
||||
const exceptionType = headers[":exception-type"]?.value;
|
||||
const errorCode = headers[":error-code"]?.value;
|
||||
const bodyStr = this.textDecoder.decode(body);
|
||||
|
||||
switch (messageType) {
|
||||
case "event":
|
||||
if (contentType === "application/json" && eventType === "chunk") {
|
||||
const { bytes } = JSON.parse(bodyStr);
|
||||
const event = Buffer.from(bytes, "base64").toString("utf8");
|
||||
return ["event: completion", `data: ${event}`].join(`\n`);
|
||||
}
|
||||
// Intentional fallthrough, non-JSON events will be something very weird
|
||||
// noinspection FallThroughInSwitchStatementJS
|
||||
case "exception":
|
||||
case "error":
|
||||
const type = exceptionType || errorCode || "UnknownError";
|
||||
switch (type) {
|
||||
case "ThrottlingException":
|
||||
log.warn(
|
||||
{ message, type },
|
||||
"AWS request throttled after streaming has already started; retrying"
|
||||
);
|
||||
throw new RetryableError("AWS request throttled mid-stream");
|
||||
default:
|
||||
log.error({ message, type }, "Received bad AWS stream event");
|
||||
return makeCompletionSSE({
|
||||
format: "anthropic",
|
||||
title: "Proxy stream error",
|
||||
message:
|
||||
"The proxy received an unrecognized error from AWS while streaming.",
|
||||
obj: message,
|
||||
reqId: "proxy-sse-adapter-message",
|
||||
model: "",
|
||||
});
|
||||
}
|
||||
default:
|
||||
// Amazon says this can't ever happen...
|
||||
log.error({ message }, "Received very bad AWS stream event");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +131,7 @@ export class SSEStreamAdapter extends Transform {
|
||||
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: Function) {
|
||||
try {
|
||||
if (this.isAwsStream) {
|
||||
this.awsParser.write(chunk);
|
||||
this.awsDecoder.write(chunk);
|
||||
} else if (this.isGoogleStream) {
|
||||
this.jsonParser.write(chunk);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user