fixes broken AWS rate limit backoff
This commit is contained in:
@@ -153,11 +153,6 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||
|
||||
const selectedKey = keysByPriority[0];
|
||||
selectedKey.lastUsed = now;
|
||||
selectedKey.rateLimitedAt = now;
|
||||
// Intended to throttle the queue processor as otherwise it will just
|
||||
// flood the API with requests and we want to wait a sec to see if we're
|
||||
// going to get a rate limit error on this key.
|
||||
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||
return { ...selectedKey };
|
||||
}
|
||||
|
||||
@@ -226,4 +221,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||
});
|
||||
this.checker?.scheduleNextCheck();
|
||||
}
|
||||
|
||||
public throttle(hash: string) {
|
||||
const key = this.keys.find((k) => k.hash === hash)!;
|
||||
const now = Date.now();
|
||||
key.rateLimitedAt = now;
|
||||
key.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,13 +37,13 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage {
|
||||
* Upon being rate limited, a key will be locked out for this many milliseconds
|
||||
* while we wait for other concurrent requests to finish.
|
||||
*/
|
||||
const RATE_LIMIT_LOCKOUT = 1000;
|
||||
const RATE_LIMIT_LOCKOUT = 4000;
|
||||
/**
|
||||
* Upon assigning a key, we will wait this many milliseconds before allowing it
|
||||
* to be used again. This is to prevent the queue from flooding a key with too
|
||||
* many requests while we wait to learn whether previous ones succeeded.
|
||||
*/
|
||||
const KEY_REUSE_DELAY = 500;
|
||||
const KEY_REUSE_DELAY = 250;
|
||||
|
||||
export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
|
||||
readonly service = "aws";
|
||||
@@ -131,11 +131,6 @@ export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
|
||||
|
||||
const selectedKey = keysByPriority[0];
|
||||
selectedKey.lastUsed = now;
|
||||
selectedKey.rateLimitedAt = now;
|
||||
// Intended to throttle the queue processor as otherwise it will just
|
||||
// flood the API with requests and we want to wait a sec to see if we're
|
||||
// going to get a rate limit error on this key.
|
||||
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||
return { ...selectedKey };
|
||||
}
|
||||
|
||||
@@ -199,4 +194,11 @@ export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
|
||||
this.update(hash, { lastChecked: 0, isDisabled: false })
|
||||
);
|
||||
}
|
||||
|
||||
public throttle(hash: string) {
|
||||
const key = this.keys.find((k) => k.hash === hash)!;
|
||||
const now = Date.now();
|
||||
key.rateLimitedAt = now;
|
||||
key.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,6 +63,7 @@ export interface KeyProvider<T extends Key = Key> {
|
||||
getLockoutPeriod(model: Model): number;
|
||||
markRateLimited(hash: string): void;
|
||||
recheck(): void;
|
||||
throttle(hash: string): void;
|
||||
}
|
||||
|
||||
export const keyPool = new KeyPool();
|
||||
@@ -80,4 +81,4 @@ export {
|
||||
export { AnthropicKey } from "./anthropic/provider";
|
||||
export { OpenAIKey } from "./openai/provider";
|
||||
export { GooglePalmKey } from "./palm/provider";
|
||||
export { AwsBedrockKey } from "./aws/provider";
|
||||
export { AwsBedrockKey } from "./aws/provider";
|
||||
|
||||
@@ -72,6 +72,11 @@ export class KeyPool {
|
||||
}, 0);
|
||||
}
|
||||
|
||||
public throttle(key: Key) {
|
||||
const provider = this.getKeyProvider(key.service);
|
||||
provider.throttle(key.hash);
|
||||
}
|
||||
|
||||
public incrementUsage(key: Key, model: string, tokens: number): void {
|
||||
const provider = this.getKeyProvider(key.service);
|
||||
provider.incrementUsage(key.hash, model, tokens);
|
||||
|
||||
@@ -221,15 +221,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
|
||||
const selectedKey = keysByPriority[0];
|
||||
selectedKey.lastUsed = now;
|
||||
|
||||
// When a key is selected, we rate-limit it for a brief period of time to
|
||||
// prevent the queue processor from immediately flooding it with requests
|
||||
// while the initial request is still being processed (which is when we will
|
||||
// get new rate limit headers).
|
||||
// Instead, we will let a request through every second until the key
|
||||
// becomes fully saturated and locked out again.
|
||||
selectedKey.rateLimitedAt = now;
|
||||
selectedKey.rateLimitRequestsReset = KEY_REUSE_DELAY;
|
||||
return { ...selectedKey };
|
||||
}
|
||||
|
||||
@@ -383,20 +374,16 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
this.checker?.scheduleNextCheck();
|
||||
}
|
||||
|
||||
/** Writes key status to disk. */
|
||||
// public writeKeyStatus() {
|
||||
// const keys = this.keys.map((key) => ({
|
||||
// key: key.key,
|
||||
// isGpt4: key.isGpt4,
|
||||
// usage: key.usage,
|
||||
// hardLimit: key.hardLimit,
|
||||
// isDisabled: key.isDisabled,
|
||||
// }));
|
||||
// fs.writeFileSync(
|
||||
// path.join(__dirname, "..", "keys.json"),
|
||||
// JSON.stringify(keys, null, 2)
|
||||
// );
|
||||
// }
|
||||
/**
|
||||
* Called when a key is selected for a request, briefly disabling it to
|
||||
* avoid spamming the API with requests while we wait to learn whether this
|
||||
* key is already rate limited.
|
||||
*/
|
||||
public throttle(hash: string) {
|
||||
const key = this.keys.find((k) => k.hash === hash)!;
|
||||
key.rateLimitedAt = Date.now();
|
||||
key.rateLimitRequestsReset = KEY_REUSE_DELAY;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -122,11 +122,6 @@ export class GooglePalmKeyProvider implements KeyProvider<GooglePalmKey> {
|
||||
|
||||
const selectedKey = keysByPriority[0];
|
||||
selectedKey.lastUsed = now;
|
||||
selectedKey.rateLimitedAt = now;
|
||||
// Intended to throttle the queue processor as otherwise it will just
|
||||
// flood the API with requests and we want to wait a sec to see if we're
|
||||
// going to get a rate limit error on this key.
|
||||
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||
return { ...selectedKey };
|
||||
}
|
||||
|
||||
@@ -186,4 +181,11 @@ export class GooglePalmKeyProvider implements KeyProvider<GooglePalmKey> {
|
||||
}
|
||||
|
||||
public recheck() {}
|
||||
|
||||
public throttle(hash: string) {
|
||||
const key = this.keys.find((k) => k.hash === hash)!;
|
||||
const now = Date.now();
|
||||
key.rateLimitedAt = now;
|
||||
key.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user