Files
simple-proxy/src/proxy/gcp.ts
T
2026-04-06 03:59:37 -07:00

259 lines
7.2 KiB
TypeScript

import { Request, RequestHandler, Response, Router } from "express";
import { createProxyMiddleware } from "http-proxy-middleware";
import { v4 } from "uuid";
import { config } from "../config";
import { logger } from "../logger";
import { createQueueMiddleware } from "./queue";
import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common";
import {
createPreprocessorMiddleware,
signGcpRequest,
finalizeSignedRequest,
createOnProxyReqHandler,
} from "./middleware/request";
import {
ProxyResHandlerWithBody,
createOnProxyResHandler,
} from "./middleware/response";
import { transformAnthropicChatResponseToOpenAI } from "./anthropic";
import { sendErrorToClient } from "./middleware/response/error-generator";
const GCP_CLAUDE_SONNET_45 = "claude-sonnet-4-5@20250929";
const GCP_CLAUDE_HAIKU_45 = "claude-haiku-4-5@20251001";
const GCP_CLAUDE_OPUS_41 = "claude-opus-4-1@20250805";
const GCP_CLAUDE_SONNET_4 = "claude-sonnet-4@20250514";
const GCP_CLAUDE_OPUS_4 = "claude-opus-4@20250514";
const GCP_CLAUDE_35_HAIKU = "claude-3-5-haiku@20241022";
let modelsCache: any = null;
let modelsCacheTime = 0;
const getModelsResponse = () => {
if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
return modelsCache;
}
if (!config.gcpCredentials) return { object: "list", data: [] };
// https://docs.anthropic.com/en/docs/about-claude/models
const variants = [
GCP_CLAUDE_HAIKU_45,
GCP_CLAUDE_SONNET_45,
GCP_CLAUDE_OPUS_41,
GCP_CLAUDE_35_HAIKU,
GCP_CLAUDE_SONNET_4,
GCP_CLAUDE_OPUS_4,
];
const models = variants.map((id) => ({
id,
object: "model",
created: new Date().getTime(),
owned_by: "anthropic",
permission: [],
root: "claude",
parent: null,
}));
modelsCache = { object: "list", data: models };
modelsCacheTime = new Date().getTime();
return modelsCache;
};
const handleModelRequest: RequestHandler = (_req, res) => {
res.status(200).json(getModelsResponse());
};
/** Only used for non-streaming requests. */
const gcpResponseHandler: ProxyResHandlerWithBody = async (
_proxyRes,
req,
res,
body
) => {
if (typeof body !== "object") {
throw new Error("Expected body to be an object");
}
let newBody = body;
switch (`${req.inboundApi}<-${req.outboundApi}`) {
case "openai<-anthropic-chat":
req.log.info("Transforming Anthropic Chat back to OpenAI format");
newBody = transformAnthropicChatResponseToOpenAI(body);
break;
}
res.status(200).json({ ...newBody, proxy: body.proxy });
};
const gcpProxy = createQueueMiddleware({
beforeProxy: signGcpRequest,
proxyMiddleware: createProxyMiddleware({
target: "bad-target-will-be-rewritten",
router: ({ signedRequest }) => {
if (!signedRequest) throw new Error("Must sign request before proxying");
return `${signedRequest.protocol}//${signedRequest.hostname}`;
},
changeOrigin: true,
selfHandleResponse: true,
logger,
on: {
proxyReq: createOnProxyReqHandler({ pipeline: [finalizeSignedRequest] }),
proxyRes: createOnProxyResHandler([gcpResponseHandler]),
error: handleProxyError,
},
}),
});
const oaiToChatPreprocessor = createPreprocessorMiddleware(
{ inApi: "openai", outApi: "anthropic-chat", service: "gcp" },
{ afterTransform: [maybeReassignModel] }
);
/**
* Routes an OpenAI prompt to either the legacy Claude text completion endpoint
* or the new Claude chat completion endpoint, based on the requested model.
*/
const preprocessOpenAICompatRequest: RequestHandler = (req, res, next) => {
oaiToChatPreprocessor(req, res, next);
};
const gcpRouter = Router();
gcpRouter.get("/v1/models", handleModelRequest);
// Native Anthropic chat completion endpoint.
gcpRouter.post(
"/v1/messages",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "gcp" },
{ afterTransform: [maybeReassignModel] }
),
gcpProxy
);
// OpenAI-to-GCP Anthropic compatibility endpoint.
gcpRouter.post(
"/v1/chat/completions",
ipLimiter,
preprocessOpenAICompatRequest,
gcpProxy
);
/**
* Tries to deal with:
* - frontends sending GCP model names even when they want to use the OpenAI-
* compatible endpoint
* - frontends sending Anthropic model names that GCP doesn't recognize
* - frontends sending OpenAI model names because they expect the proxy to
* translate them
*
* If client sends GCP model ID it will be used verbatim. Otherwise, various
* strategies are used to try to map a non-GCP model name to GCP model ID.
*/
function maybeReassignModel(req: Request) {
const model = req.body.model;
const lower = String(model).toLowerCase();
// If it looks like an GCP model, use it as-is
// if (model.includes("anthropic.claude")) {
if (model.startsWith("claude-") && model.includes("@")) {
return;
}
if (lower.includes("opus-4.5") || lower.includes("opus-4-5")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4.1") || lower.includes("opus-4-1")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("opus-4")) {
req.body.model = GCP_CLAUDE_OPUS_4;
return;
}
if (lower.includes("haiku-4.5") || lower.includes("haiku-4-5")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet-4.5") || lower.includes("sonnet-4-5")) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
if (lower.includes("sonnet-4")) {
req.body.model = GCP_CLAUDE_SONNET_4;
return;
}
if (lower.includes("3-5") && lower.includes("haiku")) {
req.body.model = GCP_CLAUDE_35_HAIKU;
return;
}
if (lower.includes("opus")) {
req.body.model = GCP_CLAUDE_OPUS_41;
return;
}
if (lower.includes("haiku")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
return;
}
if (lower.includes("sonnet")) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
if (
lower.startsWith("gpt-") ||
lower.startsWith("o1") ||
lower.startsWith("o3") ||
lower.startsWith("o4") ||
lower === "computer-use-preview"
) {
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
// Anthropic model names can look like:
// - claude-v1
// - claude-2.1
// - claude-3-5-sonnet-20240620-v1:0
const pattern =
/^(claude-)?(instant-)?(v)?(\d+)([.-](\d{1}))?(-\d+k)?(-sonnet-|-opus-|-haiku-)?(\d*)/i;
const match = model.match(pattern);
// If there's no match, fallback to Claude3 Sonnet as it is most likely to be
// available on GCP.
if (!match) {
req.body.model = GCP_CLAUDE_SONNET_4;
return;
}
const [_, _cl, instant, _v, major, _sep, minor, _ctx, name, _rev] = match;
const ver = minor ? `${major}.${minor}` : major;
switch (ver) {
case "3":
case "3.0":
if (name.includes("opus")) {
req.body.model = GCP_CLAUDE_OPUS_41;
} else if (name.includes("haiku")) {
req.body.model = GCP_CLAUDE_HAIKU_45;
} else {
req.body.model = GCP_CLAUDE_SONNET_45;
}
return;
case "3.5":
req.body.model = name.includes("haiku")
? GCP_CLAUDE_35_HAIKU
: GCP_CLAUDE_SONNET_45;
return;
}
// Fallback to Claude Sonnet 4
req.body.model = GCP_CLAUDE_SONNET_45;
return;
}
export const gcp = gcpRouter;