default claude 2.1 instead of 1.3 in openai compat endpoint since 1.3 is not accessible on all keys

This commit is contained in:
nai-degen
2024-01-17 02:36:21 -06:00
parent 4c2a2c1e6c
commit e813cd9d22
7 changed files with 10 additions and 26 deletions
+1 -10
View File
@@ -173,16 +173,7 @@ anthropicRouter.post(
function maybeReassignModel(req: Request) {
const model = req.body.model;
if (!model.startsWith("gpt-")) return;
const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
const contextSize = req.promptTokens! + req.outputTokens!;
if (contextSize > 8500) {
req.log.debug(
{ model: bigModel, contextSize },
"Using Claude 100k model for OpenAI-to-Anthropic request"
);
req.body.model = bigModel;
}
req.body.model = "claude-2.1";
}
export const anthropic = anthropicRouter;
@@ -279,11 +279,7 @@ function openaiToAnthropic(req: Request) {
stops = [...new Set(stops)];
return {
// Model may be overridden in `calculate-context-size.ts` to avoid having
// a circular dependency (`calculate-context-size.ts` needs an already-
// transformed request body to count tokens, but this function would like
// to know the count to select a model).
model: process.env.CLAUDE_SMALL_MODEL || "claude-v1",
model: rest.model,
prompt: prompt,
max_tokens_to_sample: rest.max_tokens,
stop_sequences: stops,
@@ -37,6 +37,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
break;
case "mistral-ai":
proxyMax = MISTRAL_AI_MAX_CONTENT;
break;
case "openai-image":
return;
default:
@@ -85,9 +85,6 @@ export class SSEStreamAdapter extends Transform {
}
} else {
const { bytes } = payload;
// technically this is a transformation but we don't really distinguish
// between aws claude and anthropic claude at the APIFormat level, so
// these will short circuit the message transformer
return [
"event: completion",
`data: ${Buffer.from(bytes, "base64").toString("utf8")}`,
@@ -95,8 +92,7 @@ export class SSEStreamAdapter extends Transform {
}
}
// Google doesn't use event streams and just sends elements in an array over
// a long-lived HTTP connection. Needs stream-json to parse the array.
/** Processes an incoming array element from the Google AI JSON stream. */
protected processGoogleValue(value: any): string | null {
try {
const candidates = value.candidates ?? [{}];
+1 -3
View File
@@ -163,9 +163,7 @@ const openaiProxy = createQueueMiddleware({
selfHandleResponse: true,
logger,
on: {
proxyReq: createOnProxyReqHandler({
pipeline: [addKey, finalizeBody],
}),
proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }),
proxyRes: createOnProxyResHandler([openaiResponseHandler]),
error: handleProxyError,
},
+1 -1
View File
@@ -527,7 +527,7 @@ function monitorHeartbeat(req: Request) {
if (bytesSinceLast < minBytes) {
req.log.warn(
{ minBytes, bytesSinceLast },
"Queued request is processing heartbeats enough data or server is overloaded; killing connection."
"Queued request is not processing heartbeats enough data or server is overloaded; killing connection."
);
res.destroy();
}