returns more clear proxy_note hint on AWS 503 error

This commit is contained in:
nai-degen
2024-09-09 09:56:18 -05:00
parent 2a6f85e2e2
commit ac1897fd17
3 changed files with 18 additions and 4 deletions
+14
View File
@@ -358,6 +358,20 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
default:
assertNever(service);
}
} else if (statusCode === 503) {
switch (service) {
case "aws":
if (
errorPayload.error?.type === "ServiceUnavailableException" &&
errorPayload.error?.message?.match(/too many connections/i)
) {
errorPayload.proxy_note = `The requested AWS Bedrock model is overloaded. Try again in a few minutes, or try another model.`;
}
break;
default:
errorPayload.proxy_note = `Upstream service unavailable. Try again later.`;
break;
}
} else {
errorPayload.proxy_note = `Unrecognized error from upstream service.`;
}
+1 -1
View File
@@ -29,7 +29,7 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage {
* Upon being rate limited, a key will be locked out for this many milliseconds
* while we wait for other concurrent requests to finish.
*/
const RATE_LIMIT_LOCKOUT = 4000;
const RATE_LIMIT_LOCKOUT = 5000;
/**
* Upon assigning a key, we will wait this many milliseconds before allowing it
* to be used again. This is to prevent the queue from flooding a key with too
+3 -3
View File
@@ -20,13 +20,13 @@ export function prioritizeKeys<T extends Key>(
const now = Date.now();
return keys.sort((a, b) => {
const aRateLimited = now - a.rateLimitedAt < a.rateLimitedUntil;
const bRateLimited = now - b.rateLimitedAt < b.rateLimitedUntil;
const aRateLimited = now < a.rateLimitedUntil;
const bRateLimited = now < b.rateLimitedUntil;
if (aRateLimited && !bRateLimited) return 1;
if (!aRateLimited && bRateLimited) return -1;
if (aRateLimited && bRateLimited) {
return a.rateLimitedAt - b.rateLimitedAt;
return a.rateLimitedUntil - b.rateLimitedUntil;
}
if (customComparator) {