lmao (glm+qwen newest addition)

2026-05-11 00:40:11 -07:00 · 2025-09-23 03:13:37 +02:00
commit b1ba80135a
244 changed files with 41314 additions and 0 deletions
@@ -0,0 +1,39 @@
+import Database from "better-sqlite3";
+import { DATABASE_VERSION, migrateDatabase } from "../src/shared/database";
+import { logger } from "../src/logger";
+import { config } from "../src/config";
+
+const log = logger.child({ module: "scripts/migrate" });
+
+async function runMigration() {
+  let targetVersion = Number(process.argv[2]) || undefined;
+
+  if (!targetVersion) {
+    log.info("Enter target version or leave empty to use the latest version.");
+    process.stdin.resume();
+    process.stdin.setEncoding("utf8");
+    const input = await new Promise<string>((resolve) => {
+      process.stdin.on("data", (text) => {
+        resolve((String(text) || "").trim());
+      });
+    });
+    process.stdin.pause();
+    targetVersion = Number(input);
+    if (!targetVersion) {
+      targetVersion = DATABASE_VERSION;
+    }
+  }
+
+  const db = new Database(config.sqliteDataPath, {
+    verbose: (msg, ...args) => log.debug({ args }, String(msg)),
+  });
+
+  const currentVersion = db.pragma("user_version", { simple: true });
+  log.info({ currentVersion, targetVersion }, "Running migrations.");
+  migrateDatabase(targetVersion, db);
+}
+
+runMigration().catch((error) => {
+  log.error(error, "Migration failed.");
+  process.exit(1);
+});
@@ -0,0 +1,309 @@
+# OAI Reverse Proxy
+
+###
+# @name OpenAI -- Chat Completions
+POST https://api.openai.com/v1/chat/completions
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 30,
+  "stream": false,
+  "messages": [
+    {
+      "role": "user",
+      "content": "This is a test prompt."
+    }
+  ]
+}
+
+###
+# @name OpenAI -- Text Completions
+POST https://api.openai.com/v1/completions
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+
+{
+  "model": "gpt-3.5-turbo-instruct",
+  "max_tokens": 30,
+  "stream": false,
+  "prompt": "This is a test prompt where"
+}
+
+###
+# @name OpenAI -- Create Embedding
+POST https://api.openai.com/v1/embeddings
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+
+{
+  "model": "text-embedding-ada-002",
+  "input": "This is a test embedding input."
+}
+
+###
+# @name OpenAI -- Get Organizations
+GET https://api.openai.com/v1/organizations
+Authorization: Bearer {{oai-key-1}}
+
+###
+# @name OpenAI -- Get Models
+GET https://api.openai.com/v1/models
+Authorization: Bearer {{oai-key-1}}
+
+###
+# @name Azure OpenAI -- Chat Completions
+POST https://{{azu-resource-name}}.openai.azure.com/openai/deployments/{{azu-deployment-id}}/chat/completions?api-version=2023-09-01-preview
+api-key: {{azu-key-1}}
+Content-Type: application/json
+
+{
+  "max_tokens": 1,
+  "stream": false,
+  "messages": [
+    {
+      "role": "user",
+      "content": "This is a test prompt."
+    }
+  ]
+}
+
+###
+# @name Proxy / OpenAI -- Get Models
+GET {{proxy-host}}/proxy/openai/v1/models
+Authorization: Bearer {{proxy-key}}
+
+###
+# @name Proxy / OpenAI -- Native Chat Completions
+POST {{proxy-host}}/proxy/openai/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-4-1106-preview",
+  "max_tokens": 20,
+  "stream": true,
+  "temperature": 1,
+  "seed": 123,
+  "messages": [
+    {
+      "role": "user",
+      "content": "phrase one"
+    }
+  ]
+}
+
+###
+# @name Proxy / OpenAI -- Native Text Completions
+POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-3.5-turbo-instruct",
+  "max_tokens": 20,
+  "temperature": 0,
+  "prompt": "Genshin Impact is a game about",
+  "stream": false
+}
+
+###
+# @name Proxy / OpenAI -- Chat-to-Text API Translation
+# Accepts a chat completion request and reformats it to work with the text completion API. `model` is ignored.
+POST {{proxy-host}}/proxy/openai/turbo-instruct/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-4",
+  "max_tokens": 20,
+  "stream": true,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is the name of the fourth president of the united states?"
+    },
+    {
+      "role": "assistant",
+      "content": "That would be George Washington."
+    },
+    {
+      "role": "user",
+      "content": "I don't think that's right..."
+    }
+  ]
+}
+
+###
+# @name Proxy / OpenAI -- Create Embedding
+POST {{proxy-host}}/proxy/openai/embeddings
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "text-embedding-ada-002",
+  "input": "This is a test embedding input."
+}
+
+
+###
+# @name Proxy / Anthropic -- Native Completion (old API)
+POST {{proxy-host}}/proxy/anthropic/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-01-01
+Content-Type: application/json
+
+{
+  "model": "claude-v1.3",
+  "max_tokens_to_sample": 20,
+  "temperature": 0.2,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+
+###
+# @name Proxy / Anthropic -- Native Completion (2023-06-01 API)
+POST {{proxy-host}}/proxy/anthropic/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-06-01
+Content-Type: application/json
+
+{
+  "model": "claude-v1.3",
+  "max_tokens_to_sample": 20,
+  "temperature": 0.2,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+
+###
+# @name Proxy / Anthropic -- OpenAI-to-Anthropic API Translation
+POST {{proxy-host}}/proxy/anthropic/v1/chat/completions
+Authorization: Bearer {{proxy-key}}
+#anthropic-version: 2023-06-01
+Content-Type: application/json
+
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 20,
+  "stream": false,
+  "temperature": 0,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is genshin impact"
+    }
+  ]
+}
+
+###
+# @name Proxy / AWS Claude -- Native Completion
+POST {{proxy-host}}/proxy/aws/claude/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-01-01
+Content-Type: application/json
+
+{
+  "model": "claude-v2",
+  "max_tokens_to_sample": 10,
+  "temperature": 0,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+
+###
+# @name Proxy / AWS Claude -- OpenAI-to-Anthropic API Translation
+POST {{proxy-host}}/proxy/aws/claude/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 50,
+  "stream": true,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is genshin impact?"
+    }
+  ]
+}
+
+###
+# @name Proxy / GCP Claude -- Native Completion
+POST {{proxy-host}}/proxy/gcp/claude/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-01-01
+Content-Type: application/json
+
+{
+  "model": "claude-v2",
+  "max_tokens_to_sample": 10,
+  "temperature": 0,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+
+###
+# @name Proxy / GCP Claude -- OpenAI-to-Anthropic API Translation
+POST {{proxy-host}}/proxy/gcp/claude/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 50,
+  "stream": true,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is genshin impact?"
+    }
+  ]
+}
+
+###
+# @name Proxy / Azure OpenAI -- Native Chat Completions
+POST {{proxy-host}}/proxy/azure/openai/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-4",
+  "max_tokens": 20,
+  "stream": true,
+  "temperature": 1,
+  "seed": 2,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hi what is the name of the fourth president of the united states?"
+    },
+    {
+      "role": "assistant",
+      "content": "That would be George Washington."
+    },
+    {
+      "role": "user",
+      "content": "That's not right."
+    }
+  ]
+}
+
+###
+# @name Proxy / Google AI -- OpenAI-to-Google AI API Translation
+POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+
+{
+  "model": "gpt-4",
+  "max_tokens": 42,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hi what is the name of the fourth president of the united states?"
+    }
+  ]
+}
@@ -0,0 +1,102 @@
+import Database from "better-sqlite3";
+import { v4 as uuidv4 } from "uuid";
+import { config } from "../src/config";
+
+function generateRandomIP() {
+  return (
+    Math.floor(Math.random() * 255) +
+    "." +
+    Math.floor(Math.random() * 255) +
+    "." +
+    Math.floor(Math.random() * 255) +
+    "." +
+    Math.floor(Math.random() * 255)
+  );
+}
+
+function generateRandomDate() {
+  const end = new Date();
+  const start = new Date(end);
+  start.setDate(end.getDate() - 90);
+  const randomDate = new Date(
+    start.getTime() + Math.random() * (end.getTime() - start.getTime())
+  );
+  return randomDate.toISOString();
+}
+
+function generateMockSHA256() {
+  const characters = 'abcdef0123456789';
+  let hash = '';
+
+  for (let i = 0; i < 64; i++) {
+    const randomIndex = Math.floor(Math.random() * characters.length);
+    hash += characters[randomIndex];
+  }
+
+  return hash;
+}
+
+function getRandomModelFamily() {
+  const modelFamilies = [
+    "turbo",
+    "gpt4",
+    "gpt4-32k",
+    "gpt4-turbo",
+    "claude",
+    "claude-opus",
+    "gemini-pro",
+    "mistral-tiny",
+    "mistral-small",
+    "mistral-medium",
+    "mistral-large",
+    "aws-claude",
+    "aws-claude-opus",
+    "gcp-claude",
+    "gcp-claude-opus",
+    "azure-turbo",
+    "azure-gpt4",
+    "azure-gpt4-32k",
+    "azure-gpt4-turbo",
+    "dall-e",
+    "azure-dall-e",
+  ];
+  return modelFamilies[Math.floor(Math.random() * modelFamilies.length)];
+}
+
+(async () => {
+  const db = new Database(config.sqliteDataPath);
+  const numRows = 100;
+  const insertStatement = db.prepare(`
+  INSERT INTO events (type, ip, date, model, family, hashes, userToken, inputTokens, outputTokens)
+  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+`);
+
+  const users = Array.from({ length: 10 }, () => uuidv4());
+  function getRandomUser() {
+    return users[Math.floor(Math.random() * users.length)];
+  }
+
+  const transaction = db.transaction(() => {
+    for (let i = 0; i < numRows; i++) {
+      insertStatement.run(
+        "chat_completion",
+        generateRandomIP(),
+        generateRandomDate(),
+        getRandomModelFamily() + "-" + Math.floor(Math.random() * 100),
+        getRandomModelFamily(),
+        Array.from(
+          { length: Math.floor(Math.random() * 10) },
+          generateMockSHA256
+        ).join(","),
+        getRandomUser(),
+        Math.floor(Math.random() * 500),
+        Math.floor(Math.random() * 6000)
+      );
+    }
+  });
+
+  transaction();
+
+  console.log(`Inserted ${numRows} rows into the events table.`);
+  db.close();
+})();
@@ -0,0 +1,118 @@
+// uses the aws sdk to sign a request, then uses axios to send it to the bedrock REST API manually
+import axios from "axios";
+import { Sha256 } from "@aws-crypto/sha256-js";
+import { SignatureV4 } from "@smithy/signature-v4";
+import { HttpRequest } from "@smithy/protocol-http";
+
+const AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID!;
+const AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY!;
+
+// Copied from amazon bedrock docs
+
+// List models
+// ListFoundationModels
+// Service: Amazon Bedrock
+// List of Bedrock foundation models that you can use. For more information, see Foundation models in the
+// Bedrock User Guide.
+//   Request Syntax
+// GET /foundation-models?
+//   byCustomizationType=byCustomizationType&byInferenceType=byInferenceType&byOutputModality=byOutputModality&byProvider=byProvider
+//   HTTP/1.1
+// URI Request Parameters
+// The request uses the following URI parameters.
+// byCustomizationType (p. 38)
+// List by customization type.
+//   Valid Values: FINE_TUNING
+// byInferenceType (p. 38)
+// List by inference type.
+//   Valid Values: ON_DEMAND | PROVISIONED
+// byOutputModality (p. 38)
+// List by output modality type.
+//   Valid Values: TEXT | IMAGE | EMBEDDING
+// byProvider (p. 38)
+// A Bedrock model provider.
+//   Pattern: ^[a-z0-9-]{1,63}$
+// Request Body
+// The request does not have a request body
+
+// Run inference on a text model
+// Send an invoke request to run inference on a Titan Text G1 - Express model. We set the accept
+// parameter to accept any content type in the response.
+//   POST https://bedrock.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke
+//   -H accept: */*
+// -H content-type: application/json
+// Payload
+// {"inputText": "Hello world"}
+// Example response
+// Response for the above request.
+// -H content-type: application/json
+// Payload
+// <the model response>
+
+const AMZ_REGION = "us-east-1";
+const AMZ_HOST = "invoke-bedrock.us-east-1.amazonaws.com";
+
+async function listModels() {
+  const httpRequest = new HttpRequest({
+    method: "GET",
+    protocol: "https:",
+    hostname: AMZ_HOST,
+    path: "/foundation-models",
+    headers: { ["Host"]: AMZ_HOST },
+  });
+
+  const signedRequest = await signRequest(httpRequest);
+  const response = await axios.get(
+    `https://${signedRequest.hostname}${signedRequest.path}`,
+    { headers: signedRequest.headers }
+  );
+  console.log(response.data);
+}
+
+async function invokeModel() {
+  const model = "anthropic.claude-v1";
+  const httpRequest = new HttpRequest({
+    method: "POST",
+    protocol: "https:",
+    hostname: AMZ_HOST,
+    path: `/model/${model}/invoke`,
+    headers: {
+      ["Host"]: AMZ_HOST,
+      ["accept"]: "*/*",
+      ["content-type"]: "application/json",
+    },
+    body: JSON.stringify({
+      temperature: 0.5,
+      prompt: "\n\nHuman:Hello world\n\nAssistant:",
+      max_tokens_to_sample: 10,
+    }),
+  });
+  console.log("httpRequest", httpRequest);
+
+  const signedRequest = await signRequest(httpRequest);
+  const response = await axios.post(
+    `https://${signedRequest.hostname}${signedRequest.path}`,
+    signedRequest.body,
+    { headers: signedRequest.headers }
+  );
+  console.log(response.status);
+  console.log(response.headers);
+  console.log(response.data);
+  console.log("full url", response.request.res.responseUrl);
+}
+
+async function signRequest(request: HttpRequest) {
+  const signer = new SignatureV4({
+    sha256: Sha256,
+    credentials: {
+      accessKeyId: AWS_ACCESS_KEY_ID,
+      secretAccessKey: AWS_SECRET_ACCESS_KEY,
+    },
+    region: AMZ_REGION,
+    service: "bedrock",
+  });
+  return await signer.sign(request, { signingDate: new Date() });
+}
+
+// listModels();
+// invokeModel();
@@ -0,0 +1,45 @@
+const axios = require("axios");
+
+const concurrentRequests = 75;
+const headers = {
+  Authorization: "Bearer test",
+  "Content-Type": "application/json",
+};
+
+const payload = {
+  model: "gpt-4",
+  max_tokens: 1,
+  stream: false,
+  messages: [{ role: "user", content: "Hi" }],
+};
+
+const makeRequest = async (i) => {
+  try {
+    const response = await axios.post(
+      "http://localhost:7860/proxy/google-ai/v1/chat/completions",
+      payload,
+      { headers }
+    );
+    console.log(
+      `Req ${i} finished with status code ${response.status} and response:`,
+      response.data
+    );
+  } catch (error) {
+    const msg = error.response
+    console.error(`Error in req ${i}:`, error.message, msg || "");
+  }
+};
+
+const executeRequestsConcurrently = () => {
+  const promises = [];
+  for (let i = 1; i <= concurrentRequests; i++) {
+    console.log(`Starting request ${i}`);
+    promises.push(makeRequest(i));
+  }
+
+  Promise.all(promises).then(() => {
+    console.log("All requests finished");
+  });
+};
+
+executeRequestsConcurrently();
@@ -0,0 +1,53 @@
+const axios = require("axios");
+
+function randomInteger(max) {
+  return Math.floor(Math.random() * max + 1);
+}
+
+async function testQueue() {
+  const requests = Array(10).fill(undefined).map(async function() {
+    const maxTokens = randomInteger(2000);
+
+    const headers = {
+      "Authorization": "Bearer test",
+      "Content-Type": "application/json",
+      "X-Forwarded-For": `${randomInteger(255)}.${randomInteger(255)}.${randomInteger(255)}.${randomInteger(255)}`,
+    };
+
+    const payload = {
+      model: "gpt-4o-mini-2024-07-18",
+      max_tokens: 20 + maxTokens,
+      stream: false,
+      messages: [{role: "user", content: "You are being benchmarked regarding your reliability at outputting exact, machine-comprehensible data. Output the sentence \"The quick brown fox jumps over the lazy dog.\" Do not precede it with quotemarks or any form of preamble, and do not output anything after the sentence."}],
+      temperature: 0,
+    };
+
+    try {
+      const response = await axios.post(
+        "http://localhost:7860/proxy/openai/v1/chat/completions",
+        payload,
+        { headers }
+      );
+
+            if (response.status !== 200) {
+          console.error(`Request {$maxTokens} finished with status code ${response.status} and response`, response.data);
+          return;
+        }
+
+      const content = response.data.choices[0].message.content;
+
+      console.log(
+        `Request ${maxTokens} `,
+        content === "The quick brown fox jumps over the lazy dog." ? "OK" : `mangled: ${content}`
+      );
+    } catch (error) {
+      const msg = error.response;
+      console.error(`Error in req ${maxTokens}:`, error.message, msg || "");
+    }
+  });
+
+  await Promise.all(requests);
+  console.log("All requests finished");
+}
+
+testQueue();