fixes broken AWS rate limit backoff

This commit is contained in:
nai-degen
2023-10-24 09:19:46 -05:00
parent 89e9b67f3f
commit 26dc79c8f1
9 changed files with 45 additions and 43 deletions
@@ -153,11 +153,6 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
const selectedKey = keysByPriority[0];
selectedKey.lastUsed = now;
selectedKey.rateLimitedAt = now;
// Intended to throttle the queue processor as otherwise it will just
// flood the API with requests and we want to wait a sec to see if we're
// going to get a rate limit error on this key.
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
return { ...selectedKey };
}
@@ -226,4 +221,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
});
this.checker?.scheduleNextCheck();
}
public throttle(hash: string) {
const key = this.keys.find((k) => k.hash === hash)!;
const now = Date.now();
key.rateLimitedAt = now;
key.rateLimitedUntil = now + KEY_REUSE_DELAY;
}
}
+9 -7
View File
@@ -37,13 +37,13 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage {
* Upon being rate limited, a key will be locked out for this many milliseconds
* while we wait for other concurrent requests to finish.
*/
const RATE_LIMIT_LOCKOUT = 1000;
const RATE_LIMIT_LOCKOUT = 4000;
/**
* Upon assigning a key, we will wait this many milliseconds before allowing it
* to be used again. This is to prevent the queue from flooding a key with too
* many requests while we wait to learn whether previous ones succeeded.
*/
const KEY_REUSE_DELAY = 500;
const KEY_REUSE_DELAY = 250;
export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
readonly service = "aws";
@@ -131,11 +131,6 @@ export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
const selectedKey = keysByPriority[0];
selectedKey.lastUsed = now;
selectedKey.rateLimitedAt = now;
// Intended to throttle the queue processor as otherwise it will just
// flood the API with requests and we want to wait a sec to see if we're
// going to get a rate limit error on this key.
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
return { ...selectedKey };
}
@@ -199,4 +194,11 @@ export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
this.update(hash, { lastChecked: 0, isDisabled: false })
);
}
public throttle(hash: string) {
const key = this.keys.find((k) => k.hash === hash)!;
const now = Date.now();
key.rateLimitedAt = now;
key.rateLimitedUntil = now + KEY_REUSE_DELAY;
}
}
+2 -1
View File
@@ -63,6 +63,7 @@ export interface KeyProvider<T extends Key = Key> {
getLockoutPeriod(model: Model): number;
markRateLimited(hash: string): void;
recheck(): void;
throttle(hash: string): void;
}
export const keyPool = new KeyPool();
@@ -80,4 +81,4 @@ export {
export { AnthropicKey } from "./anthropic/provider";
export { OpenAIKey } from "./openai/provider";
export { GooglePalmKey } from "./palm/provider";
export { AwsBedrockKey } from "./aws/provider";
export { AwsBedrockKey } from "./aws/provider";
+5
View File
@@ -72,6 +72,11 @@ export class KeyPool {
}, 0);
}
public throttle(key: Key) {
const provider = this.getKeyProvider(key.service);
provider.throttle(key.hash);
}
public incrementUsage(key: Key, model: string, tokens: number): void {
const provider = this.getKeyProvider(key.service);
provider.incrementUsage(key.hash, model, tokens);
+10 -23
View File
@@ -221,15 +221,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
const selectedKey = keysByPriority[0];
selectedKey.lastUsed = now;
// When a key is selected, we rate-limit it for a brief period of time to
// prevent the queue processor from immediately flooding it with requests
// while the initial request is still being processed (which is when we will
// get new rate limit headers).
// Instead, we will let a request through every second until the key
// becomes fully saturated and locked out again.
selectedKey.rateLimitedAt = now;
selectedKey.rateLimitRequestsReset = KEY_REUSE_DELAY;
return { ...selectedKey };
}
@@ -383,20 +374,16 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
this.checker?.scheduleNextCheck();
}
/** Writes key status to disk. */
// public writeKeyStatus() {
// const keys = this.keys.map((key) => ({
// key: key.key,
// isGpt4: key.isGpt4,
// usage: key.usage,
// hardLimit: key.hardLimit,
// isDisabled: key.isDisabled,
// }));
// fs.writeFileSync(
// path.join(__dirname, "..", "keys.json"),
// JSON.stringify(keys, null, 2)
// );
// }
/**
* Called when a key is selected for a request, briefly disabling it to
* avoid spamming the API with requests while we wait to learn whether this
* key is already rate limited.
*/
public throttle(hash: string) {
const key = this.keys.find((k) => k.hash === hash)!;
key.rateLimitedAt = Date.now();
key.rateLimitRequestsReset = KEY_REUSE_DELAY;
}
}
/**
+7 -5
View File
@@ -122,11 +122,6 @@ export class GooglePalmKeyProvider implements KeyProvider<GooglePalmKey> {
const selectedKey = keysByPriority[0];
selectedKey.lastUsed = now;
selectedKey.rateLimitedAt = now;
// Intended to throttle the queue processor as otherwise it will just
// flood the API with requests and we want to wait a sec to see if we're
// going to get a rate limit error on this key.
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
return { ...selectedKey };
}
@@ -186,4 +181,11 @@ export class GooglePalmKeyProvider implements KeyProvider<GooglePalmKey> {
}
public recheck() {}
public throttle(hash: string) {
const key = this.keys.find((k) => k.hash === hash)!;
const now = Date.now();
key.rateLimitedAt = now;
key.rateLimitedUntil = now + KEY_REUSE_DELAY;
}
}