default claude 2.1 instead of 1.3 in openai compat endpoint since 1.3 is not accessible on all keys
This commit is contained in:
+4
-2
@@ -5,6 +5,9 @@
|
||||
# All values have reasonable defaults, so you only need to change the ones you
|
||||
# want to override.
|
||||
|
||||
# Use production mode unless you are developing locally.
|
||||
NODE_ENV=production
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# General settings:
|
||||
|
||||
@@ -112,8 +115,7 @@
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Secrets and keys:
|
||||
# Do not put any passwords or API keys directly in this file.
|
||||
# For Huggingface, set them via the Secrets section in your Space's config UI.
|
||||
# For Huggingface, set them via the Secrets section in your Space's config UI. Dp not set them in .env.
|
||||
# For Render, create a "secret file" called .env using the Environment tab.
|
||||
|
||||
# You can add multiple API keys by separating them with a comma.
|
||||
|
||||
+1
-10
@@ -173,16 +173,7 @@ anthropicRouter.post(
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
if (!model.startsWith("gpt-")) return;
|
||||
|
||||
const bigModel = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
|
||||
const contextSize = req.promptTokens! + req.outputTokens!;
|
||||
if (contextSize > 8500) {
|
||||
req.log.debug(
|
||||
{ model: bigModel, contextSize },
|
||||
"Using Claude 100k model for OpenAI-to-Anthropic request"
|
||||
);
|
||||
req.body.model = bigModel;
|
||||
}
|
||||
req.body.model = "claude-2.1";
|
||||
}
|
||||
|
||||
export const anthropic = anthropicRouter;
|
||||
|
||||
@@ -279,11 +279,7 @@ function openaiToAnthropic(req: Request) {
|
||||
stops = [...new Set(stops)];
|
||||
|
||||
return {
|
||||
// Model may be overridden in `calculate-context-size.ts` to avoid having
|
||||
// a circular dependency (`calculate-context-size.ts` needs an already-
|
||||
// transformed request body to count tokens, but this function would like
|
||||
// to know the count to select a model).
|
||||
model: process.env.CLAUDE_SMALL_MODEL || "claude-v1",
|
||||
model: rest.model,
|
||||
prompt: prompt,
|
||||
max_tokens_to_sample: rest.max_tokens,
|
||||
stop_sequences: stops,
|
||||
|
||||
@@ -37,6 +37,7 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||
break;
|
||||
case "mistral-ai":
|
||||
proxyMax = MISTRAL_AI_MAX_CONTENT;
|
||||
break;
|
||||
case "openai-image":
|
||||
return;
|
||||
default:
|
||||
|
||||
@@ -85,9 +85,6 @@ export class SSEStreamAdapter extends Transform {
|
||||
}
|
||||
} else {
|
||||
const { bytes } = payload;
|
||||
// technically this is a transformation but we don't really distinguish
|
||||
// between aws claude and anthropic claude at the APIFormat level, so
|
||||
// these will short circuit the message transformer
|
||||
return [
|
||||
"event: completion",
|
||||
`data: ${Buffer.from(bytes, "base64").toString("utf8")}`,
|
||||
@@ -95,8 +92,7 @@ export class SSEStreamAdapter extends Transform {
|
||||
}
|
||||
}
|
||||
|
||||
// Google doesn't use event streams and just sends elements in an array over
|
||||
// a long-lived HTTP connection. Needs stream-json to parse the array.
|
||||
/** Processes an incoming array element from the Google AI JSON stream. */
|
||||
protected processGoogleValue(value: any): string | null {
|
||||
try {
|
||||
const candidates = value.candidates ?? [{}];
|
||||
|
||||
+1
-3
@@ -163,9 +163,7 @@ const openaiProxy = createQueueMiddleware({
|
||||
selfHandleResponse: true,
|
||||
logger,
|
||||
on: {
|
||||
proxyReq: createOnProxyReqHandler({
|
||||
pipeline: [addKey, finalizeBody],
|
||||
}),
|
||||
proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }),
|
||||
proxyRes: createOnProxyResHandler([openaiResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
|
||||
+1
-1
@@ -527,7 +527,7 @@ function monitorHeartbeat(req: Request) {
|
||||
if (bytesSinceLast < minBytes) {
|
||||
req.log.warn(
|
||||
{ minBytes, bytesSinceLast },
|
||||
"Queued request is processing heartbeats enough data or server is overloaded; killing connection."
|
||||
"Queued request is not processing heartbeats enough data or server is overloaded; killing connection."
|
||||
);
|
||||
res.destroy();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user