replaces eventstream lib to (hopefully) fix interrupted AWS streams

This commit is contained in:
nai-degen
2024-02-04 17:18:28 -06:00
parent c88f47d0ed
commit 98cea2da02
4 changed files with 188 additions and 121 deletions
+24 -65
View File
@@ -11,9 +11,11 @@
"dependencies": {
"@anthropic-ai/tokenizer": "^0.0.4",
"@aws-crypto/sha256-js": "^5.1.0",
"@smithy/eventstream-codec": "^2.1.1",
"@smithy/protocol-http": "^3.0.6",
"@smithy/signature-v4": "^2.0.10",
"@smithy/types": "^2.3.4",
"@smithy/util-utf8": "^2.1.1",
"axios": "^1.3.5",
"check-disk-space": "^3.4.0",
"cookie-parser": "^1.4.6",
@@ -27,7 +29,6 @@
"firebase-admin": "^11.10.1",
"googleapis": "^122.0.0",
"http-proxy-middleware": "^3.0.0-beta.1",
"lifion-aws-event-stream": "^1.0.7",
"memorystore": "^1.6.7",
"multer": "^1.4.5-lts.1",
"node-schedule": "^2.1.1",
@@ -837,20 +838,20 @@
"optional": true
},
"node_modules/@smithy/eventstream-codec": {
"version": "2.0.10",
"resolved": "https://registry.npmjs.org/@smithy/eventstream-codec/-/eventstream-codec-2.0.10.tgz",
"integrity": "sha512-3SSDgX2nIsFwif6m+I4+ar4KDcZX463Noes8ekBgQHitULiWvaDZX8XqPaRQSQ4bl1vbeVXHklJfv66MnVO+lw==",
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@smithy/eventstream-codec/-/eventstream-codec-2.1.1.tgz",
"integrity": "sha512-E8KYBxBIuU4c+zrpR22VsVrOPoEDzk35bQR3E+xm4k6Pa6JqzkDOdMyf9Atac5GPNKHJBdVaQ4JtjdWX2rl/nw==",
"dependencies": {
"@aws-crypto/crc32": "3.0.0",
"@smithy/types": "^2.3.4",
"@smithy/util-hex-encoding": "^2.0.0",
"@smithy/types": "^2.9.1",
"@smithy/util-hex-encoding": "^2.1.1",
"tslib": "^2.5.0"
}
},
"node_modules/@smithy/is-array-buffer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.0.0.tgz",
"integrity": "sha512-z3PjFjMyZNI98JFRJi/U0nGoLWMSJlDjAW4QUX2WNZLas5C0CmVV6LJ01JI0k90l7FvpmixjWxPFmENSClQ7ug==",
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.1.1.tgz",
"integrity": "sha512-xozSQrcUinPpNPNPds4S7z/FakDTh1MZWtRP/2vQtYB/u3HYrX2UXuZs+VhaKBd6Vc7g2XPr2ZtwGBNDN6fNKQ==",
"dependencies": {
"tslib": "^2.5.0"
},
@@ -889,9 +890,9 @@
}
},
"node_modules/@smithy/types": {
"version": "2.3.4",
"resolved": "https://registry.npmjs.org/@smithy/types/-/types-2.3.4.tgz",
"integrity": "sha512-D7xlM9FOMFyFw7YnMXn9dK2KuN6+JhnrZwVt1fWaIu8hCk5CigysweeIT/H/nCo4YV+s8/oqUdLfexbkPZtvqw==",
"version": "2.9.1",
"resolved": "https://registry.npmjs.org/@smithy/types/-/types-2.9.1.tgz",
"integrity": "sha512-vjXlKNXyprDYDuJ7UW5iobdmyDm6g8dDG+BFUncAg/3XJaN45Gy5RWWWUVgrzIK7S4R1KWgIX5LeJcfvSI24bw==",
"dependencies": {
"tslib": "^2.5.0"
},
@@ -900,11 +901,11 @@
}
},
"node_modules/@smithy/util-buffer-from": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.0.0.tgz",
"integrity": "sha512-/YNnLoHsR+4W4Vf2wL5lGv0ksg8Bmk3GEGxn2vEQt52AQaPSCuaO5PM5VM7lP1K9qHRKHwrPGktqVoAHKWHxzw==",
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.1.1.tgz",
"integrity": "sha512-clhNjbyfqIv9Md2Mg6FffGVrJxw7bgK7s3Iax36xnfVj6cg0fUG7I4RH0XgXJF8bxi+saY5HR21g2UPKSxVCXg==",
"dependencies": {
"@smithy/is-array-buffer": "^2.0.0",
"@smithy/is-array-buffer": "^2.1.1",
"tslib": "^2.5.0"
},
"engines": {
@@ -912,9 +913,9 @@
}
},
"node_modules/@smithy/util-hex-encoding": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-2.0.0.tgz",
"integrity": "sha512-c5xY+NUnFqG6d7HFh1IFfrm3mGl29lC+vF+geHv4ToiuJCBmIfzx6IeHLg+OgRdPFKDXIw6pvi+p3CsscaMcMA==",
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-2.1.1.tgz",
"integrity": "sha512-3UNdP2pkYUUBGEXzQI9ODTDK+Tcu1BlCyDBaRHwyxhA+8xLP8agEKQq4MGmpjqb4VQAjq9TwlCQX0kP6XDKYLg==",
"dependencies": {
"tslib": "^2.5.0"
},
@@ -946,11 +947,11 @@
}
},
"node_modules/@smithy/util-utf8": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.0.0.tgz",
"integrity": "sha512-rctU1VkziY84n5OXe3bPNpKR001ZCME2JCaBBFgtiM2hfKbHFudc/BkMuPab8hRbLd0j3vbnBTTZ1igBf0wgiQ==",
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.1.1.tgz",
"integrity": "sha512-BqTpzYEcUMDwAKr7/mVRUtHDhs6ZoXDi9NypMvMfOr/+u1NW7JgqodPDECiiLboEm6bobcPcECxzjtQh865e9A==",
"dependencies": {
"@smithy/util-buffer-from": "^2.0.0",
"@smithy/util-buffer-from": "^2.1.1",
"tslib": "^2.5.0"
},
"engines": {
@@ -2022,37 +2023,6 @@
"node": ">= 0.10"
}
},
"node_modules/crc": {
"version": "3.8.0",
"resolved": "https://registry.npmjs.org/crc/-/crc-3.8.0.tgz",
"integrity": "sha512-iX3mfgcTMIq3ZKLIsVFAbv7+Mc10kxabAGQb8HvjA1o3T1PIYprbakQ65d3I+2HGHt6nSKkM9PYjgoJO2KcFBQ==",
"dependencies": {
"buffer": "^5.1.0"
}
},
"node_modules/crc/node_modules/buffer": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"dependencies": {
"base64-js": "^1.3.1",
"ieee754": "^1.1.13"
}
},
"node_modules/create-require": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
@@ -3699,17 +3669,6 @@
"graceful-fs": "^4.1.9"
}
},
"node_modules/lifion-aws-event-stream": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/lifion-aws-event-stream/-/lifion-aws-event-stream-1.0.7.tgz",
"integrity": "sha512-qI0O85OrV5A9rBE++oIaWFjNngk/BqjnJ+3/wdtIPLfFWhPtf+xNuWd/T8lr/wnEpKm/8HbdgYf8pKozk0dPAw==",
"dependencies": {
"crc": "^3.8.0"
},
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/limiter": {
"version": "1.1.5",
"resolved": "https://registry.npmjs.org/limiter/-/limiter-1.1.5.tgz",
+2 -1
View File
@@ -19,9 +19,11 @@
"dependencies": {
"@anthropic-ai/tokenizer": "^0.0.4",
"@aws-crypto/sha256-js": "^5.1.0",
"@smithy/eventstream-codec": "^2.1.1",
"@smithy/protocol-http": "^3.0.6",
"@smithy/signature-v4": "^2.0.10",
"@smithy/types": "^2.3.4",
"@smithy/util-utf8": "^2.1.1",
"axios": "^1.3.5",
"check-disk-space": "^3.4.0",
"cookie-parser": "^1.4.6",
@@ -35,7 +37,6 @@
"firebase-admin": "^11.10.1",
"googleapis": "^122.0.0",
"http-proxy-middleware": "^3.0.0-beta.1",
"lifion-aws-event-stream": "^1.0.7",
"memorystore": "^1.6.7",
"multer": "^1.4.5-lts.1",
"node-schedule": "^2.1.1",
@@ -0,0 +1,97 @@
import { Transform, TransformOptions } from "stream";
import {
EventStreamCodec,
Message,
MessageDecoderStream,
} from "@smithy/eventstream-codec";
import { fromUtf8, toUtf8 } from "@smithy/util-utf8";
import { logger } from "../../../../logger";
const log = logger.child({ module: "aws-eventstream-decoder" });
/**
* Consumes an HTTP response stream and transforms it into a decoded stream of
* AWS vnd.amazon.eventstream messages.
*
* The AWS library uses async iterators, so this class needs to act as a bridge
* between the async generator and the Node stream API for downstream consumers.
*/
export class AWSEventStreamDecoder extends Transform {
private readonly decoder: EventStreamCodec;
private messageStream: MessageDecoderStream | null = null;
private queue: Uint8Array[] = [];
private resolveChunk: ((value: Uint8Array | null) => void) | null = null;
constructor(options?: TransformOptions) {
super({ ...options, objectMode: true });
this.decoder = new EventStreamCodec(toUtf8, fromUtf8);
this.setupStream();
}
protected enqueueChunk(chunk: Uint8Array) {
if (this.resolveChunk) {
this.resolveChunk(chunk);
this.resolveChunk = null;
} else {
this.queue.push(chunk);
}
}
protected dequeueChunk(): Promise<Uint8Array | null> {
if (this.queue.length > 0) {
return Promise.resolve(this.queue.shift()!);
}
return new Promise((resolve) => (this.resolveChunk = resolve));
}
protected setupStream() {
const that = this;
// This generator wraps the response stream (via the chunk queue) in an
// async iterable that can be consumed by the Amazon EventStream library.
const inputGenerator = (async function* () {
while (true) {
const chunk = await that.dequeueChunk();
if (chunk === null) break;
yield chunk;
}
log.debug("Input stream generator finished");
});
// MessageDecoderStream is an async iterator that consumes chunks from
// inputGenerator and yields fully decoded individual messages.
this.messageStream = new MessageDecoderStream({
decoder: this.decoder,
inputStream: inputGenerator(),
});
// Start the generator and push messages downstream as they are decoded.
let lastMessage: Message | null = null;
(async function () {
try {
log.debug("Starting generator");
for await (const message of that.messageStream!) {
lastMessage = message;
that.push(message);
}
that.push(null);
} catch (err) {
log.error({ err, lastMessage }, "Error decoding eventstream message");
that.emit("error", err);
}
})();
}
_transform(chunk: Buffer, _encoding: string, callback: () => void) {
this.enqueueChunk(chunk);
callback();
}
_flush(callback: () => void) {
log.debug("Received end of stream; stopping generator");
if (this.resolveChunk) {
this.resolveChunk(null);
}
callback();
}
}
@@ -1,13 +1,12 @@
import { Transform, TransformOptions } from "stream";
import { StringDecoder } from "string_decoder";
// @ts-ignore
import { Parser } from "lifion-aws-event-stream";
import { logger } from "../../../../logger";
import { RetryableError } from "../index";
import { APIFormat } from "../../../../shared/key-management";
import { Message } from "@smithy/eventstream-codec";
import StreamArray from "stream-json/streamers/StreamArray";
import { StringDecoder } from "string_decoder";
import { logger } from "../../../../logger";
import { APIFormat } from "../../../../shared/key-management";
import { makeCompletionSSE } from "../../../../shared/streaming";
import { RetryableError } from "../index";
import { AWSEventStreamDecoder } from "./aws-eventstream-decoder";
const log = logger.child({ module: "sse-stream-adapter" });
@@ -15,25 +14,19 @@ type SSEStreamAdapterOptions = TransformOptions & {
contentType?: string;
api: APIFormat;
};
type AwsEventStreamMessage = {
headers: {
":message-type": "event" | "exception";
":exception-type"?: string;
};
payload: { message?: string /** base64 encoded */; bytes?: string };
};
/**
* Receives either text chunks or AWS binary event stream chunks and emits
* full SSE events.
* Receives either text chunks or AWS vnd.amazon.eventstream messages and emits
* full SSE-compliant messages.
*/
export class SSEStreamAdapter extends Transform {
private readonly isAwsStream;
private readonly isGoogleStream;
private awsParser = new Parser();
private awsDecoder = new AWSEventStreamDecoder();
private jsonParser = StreamArray.withParser();
private partialMessage = "";
private decoder = new StringDecoder("utf8");
private textDecoder = new TextDecoder("utf8");
constructor(options?: SSEStreamAdapterOptions) {
super(options);
@@ -41,10 +34,14 @@ export class SSEStreamAdapter extends Transform {
options?.contentType === "application/vnd.amazon.eventstream";
this.isGoogleStream = options?.api === "google-ai";
this.awsParser.on("data", (data: AwsEventStreamMessage) => {
const message = this.processAwsEvent(data);
if (message) {
this.push(Buffer.from(message + "\n\n"), "utf8");
this.awsDecoder.on("data", (data: Message) => {
try {
const message = this.processAwsEvent(data);
if (message) {
this.push(Buffer.from(message + "\n\n"), "utf8");
}
} catch (error) {
this.emit("error", error);
}
});
@@ -56,39 +53,52 @@ export class SSEStreamAdapter extends Transform {
});
}
protected processAwsEvent(event: AwsEventStreamMessage): string | null {
const { payload, headers } = event;
if (headers[":message-type"] === "exception" || !payload.bytes) {
const eventStr = JSON.stringify(event);
// Under high load, AWS can rugpull us by returning a 200 and starting the
// stream but then immediately sending a rate limit error as the first
// event. My guess is some race condition in their rate limiting check
// that occurs if two requests arrive at the same time when only one
// concurrency slot is available.
if (headers[":exception-type"] === "throttlingException") {
log.warn(
{ event: eventStr },
"AWS request throttled after streaming has already started; retrying"
);
throw new RetryableError("AWS request throttled mid-stream");
} else {
log.error({ event: eventStr }, "Received bad AWS stream event");
return makeCompletionSSE({
format: "anthropic",
title: "Proxy stream error",
message:
"The proxy received malformed or unexpected data from AWS while streaming.",
obj: event,
reqId: "proxy-sse-adapter-message",
model: "",
});
}
} else {
const { bytes } = payload;
return [
"event: completion",
`data: ${Buffer.from(bytes, "base64").toString("utf8")}`,
].join("\n");
protected processAwsEvent(message: Message): string | null {
// Per amazon, headers and body are always present. headers is an object,
// body is a Uint8Array, potentially zero-length.
const { headers, body } = message;
const eventType = headers[":event-type"]?.value;
const messageType = headers[":message-type"]?.value;
const contentType = headers[":content-type"]?.value;
const exceptionType = headers[":exception-type"]?.value;
const errorCode = headers[":error-code"]?.value;
const bodyStr = this.textDecoder.decode(body);
switch (messageType) {
case "event":
if (contentType === "application/json" && eventType === "chunk") {
const { bytes } = JSON.parse(bodyStr);
const event = Buffer.from(bytes, "base64").toString("utf8");
return ["event: completion", `data: ${event}`].join(`\n`);
}
// Intentional fallthrough, non-JSON events will be something very weird
// noinspection FallThroughInSwitchStatementJS
case "exception":
case "error":
const type = exceptionType || errorCode || "UnknownError";
switch (type) {
case "ThrottlingException":
log.warn(
{ message, type },
"AWS request throttled after streaming has already started; retrying"
);
throw new RetryableError("AWS request throttled mid-stream");
default:
log.error({ message, type }, "Received bad AWS stream event");
return makeCompletionSSE({
format: "anthropic",
title: "Proxy stream error",
message:
"The proxy received an unrecognized error from AWS while streaming.",
obj: message,
reqId: "proxy-sse-adapter-message",
model: "",
});
}
default:
// Amazon says this can't ever happen...
log.error({ message }, "Received very bad AWS stream event");
return null;
}
}
@@ -121,7 +131,7 @@ export class SSEStreamAdapter extends Transform {
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: Function) {
try {
if (this.isAwsStream) {
this.awsParser.write(chunk);
this.awsDecoder.write(chunk);
} else if (this.isGoogleStream) {
this.jsonParser.write(chunk);
} else {