partial googleai fixes; adds jsonl file backend for promptlogger stolen from fiz

This commit is contained in:
nai-degen
2024-04-23 03:43:38 -05:00
parent 0a27345c29
commit 32b623d6bc
7 changed files with 150 additions and 8 deletions
+14 -1
View File
@@ -149,7 +149,9 @@ type Config = {
/** Whether prompts and responses should be logged to persistent storage. */
promptLogging?: boolean;
/** Which prompt logging backend to use. */
promptLoggingBackend?: "google_sheets";
promptLoggingBackend?: "google_sheets" | "file";
/** Prefix for prompt logging files when using the file backend. */
promptLoggingFilePrefix?: string;
/** Base64-encoded Google Sheets API key. */
googleSheetsKey?: string;
/** Google Sheets spreadsheet ID. */
@@ -329,6 +331,10 @@ export const config: Config = {
allowAwsLogging: getEnvWithDefault("ALLOW_AWS_LOGGING", false),
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
promptLoggingFilePrefix: getEnvWithDefault(
"PROMPT_LOGGING_FILE_PREFIX",
"prompt-logs"
),
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
googleSheetsSpreadsheetId: getEnvWithDefault(
"GOOGLE_SHEETS_SPREADSHEET_ID",
@@ -387,6 +393,12 @@ export async function assertConfigIsValid() {
);
}
if (config.promptLogging && !config.promptLoggingBackend) {
throw new Error(
"Prompt logging is enabled but no backend is configured. Set PROMPT_LOGGING_BACKEND to 'google_sheets' or 'file'."
);
}
if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) {
throw new Error(
`Invalid gatekeeper mode: ${config.gatekeeper}. Must be one of: none, proxy_key, user_token.`
@@ -463,6 +475,7 @@ export const OMITTED_KEYS = [
"rejectPhrases",
"rejectMessage",
"showTokenCosts",
"promptLoggingFilePrefix",
"googleSheetsKey",
"firebaseKey",
"firebaseRtdbUrl",
+24 -2
View File
@@ -11,7 +11,7 @@ import { ProxyResHandlerWithBody } from ".";
import { assertNever } from "../../../shared/utils";
import {
AnthropicChatMessage,
flattenAnthropicMessages,
flattenAnthropicMessages, GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
} from "../../../shared/api-schemas";
@@ -62,6 +62,7 @@ const getPromptForRequest = (
):
| string
| OpenAIChatMessage[]
| { contents: GoogleAIChatMessage[] }
| { system: string; messages: AnthropicChatMessage[] }
| MistralAIChatMessage[]
| OaiImageResult => {
@@ -87,7 +88,7 @@ const getPromptForRequest = (
case "anthropic-text":
return req.body.prompt;
case "google-ai":
return req.body.prompt.text;
return { contents: req.body.contents };
default:
assertNever(req.outboundApi);
}
@@ -98,6 +99,7 @@ const flattenMessages = (
| string
| OaiImageResult
| OpenAIChatMessage[]
| { contents: GoogleAIChatMessage[] }
| { system: string; messages: AnthropicChatMessage[] }
| MistralAIChatMessage[]
): string => {
@@ -108,6 +110,16 @@ const flattenMessages = (
const { system, messages } = val;
return `System: ${system}\n\n${flattenAnthropicMessages(messages)}`;
}
if (isGoogleAIChatPrompt(val)) {
return val.contents
.map(({ parts, role }) => {
const text = parts
.map((p) => p.text)
.join("\n");
return `${role}: ${text}`;
})
.join("\n");
}
if (Array.isArray(val)) {
return val
.map(({ content, role }) => {
@@ -128,6 +140,16 @@ const flattenMessages = (
return val.prompt.trim();
};
function isGoogleAIChatPrompt(
val: unknown
): val is { contents: GoogleAIChatMessage[] } {
return (
typeof val === "object" &&
val !== null &&
"contents" in val
);
}
function isAnthropicChatPrompt(
val: unknown
): val is { system: string; messages: AnthropicChatMessage[] } {
@@ -116,7 +116,7 @@ export class SSEStreamAdapter extends Transform {
try {
const hasParts = candidates[0].content?.parts?.length > 0;
if (hasParts) {
return `data: ${JSON.stringify(data)}`;
return `data: ${JSON.stringify(data.value ?? data)}\n`;
} else {
this.log.error({ event: data }, "Received bad Google AI event");
return `data: ${buildSpoofedSSE({
@@ -0,0 +1,86 @@
// stolen from https://gitgud.io/fiz1/oai-reverse-proxy
import { promises as fs } from "fs";
import * as path from "path";
import { USER_ASSETS_DIR, config } from "../../../config";
import { logger } from "../../../logger";
import { LogBackend, PromptLogEntry } from "../index";
import { glob } from "glob";
const MAX_FILE_SIZE = 25 * 1024 * 1024;
let currentFileNumber = 0;
let currentFilePath = "";
let currentFileSize = 0;
export { currentFileNumber };
export const fileBackend: LogBackend = {
init: async (_onStop: () => void) => {
try {
await createNewLogFile();
} catch (error) {
logger.error("Error initializing file backend", error);
throw error;
}
const files = glob.sync(
path.join(USER_ASSETS_DIR, `${config.promptLoggingFilePrefix}*.jsonl`),
{ windowsPathsNoEscape: true }
);
const sorted = files.sort((a, b) => {
const aNum = parseInt(path.basename(a).replace(/[^0-9]/g, ""), 10);
const bNum = parseInt(path.basename(b).replace(/[^0-9]/g, ""), 10);
return aNum - bNum;
});
if (sorted.length > 0) {
const latestFile = sorted[sorted.length - 1];
const stats = await fs.stat(latestFile);
currentFileNumber = parseInt(
path.basename(latestFile).replace(/[^0-9]/g, ""),
10
);
currentFilePath = latestFile;
currentFileSize = stats.size;
}
logger.info(
{ currentFileNumber, currentFilePath, currentFileSize },
"File backend initialized"
);
},
appendBatch: async (batch: PromptLogEntry[]) => {
try {
if (currentFileSize > MAX_FILE_SIZE) {
await createNewLogFile();
}
const batchString =
batch.map((entry) => JSON.stringify(entry)).join("\n") + "\n";
const batchSizeBytes = Buffer.byteLength(batchString);
const batchLines = batch.length;
logger.debug(
{ batchLines, batchSizeBytes, currentFileSize, file: currentFilePath },
"Appending batch to file"
);
await fs.appendFile(currentFilePath, batchString);
currentFileSize += Buffer.byteLength(batchString);
} catch (error) {
logger.error("Error appending batch to file", error);
throw error;
}
},
};
async function createNewLogFile() {
currentFileNumber++;
currentFilePath = path.join(
USER_ASSETS_DIR,
`${config.promptLoggingFilePrefix}${currentFileNumber}.jsonl`
);
currentFileSize = 0;
await fs.writeFile(currentFilePath, "");
logger.info(`Created new log file: ${currentFilePath}`);
}
@@ -1 +1,2 @@
export * as sheets from "./sheets";
export { fileBackend as file } from "./file";
+5
View File
@@ -17,4 +17,9 @@ export interface PromptLogEntry {
// TODO: temperature, top_p, top_k, etc.
}
export interface LogBackend {
init: (onStop: () => void) => Promise<void>;
appendBatch: (batch: PromptLogEntry[]) => Promise<void>;
}
export * as logQueue from "./log-queue";
+19 -4
View File
@@ -2,8 +2,10 @@
* logging backend. */
import { logger } from "../../logger";
import { PromptLogEntry } from ".";
import { sheets } from "./backends";
import { LogBackend, PromptLogEntry } from ".";
import { sheets, file } from "./backends";
import { config } from "../../config";
import { assertNever } from "../utils";
const FLUSH_INTERVAL = 1000 * 10;
const MAX_BATCH_SIZE = 25;
@@ -15,6 +17,7 @@ let started = false;
let timeoutId: NodeJS.Timeout | null = null;
let retrying = false;
let consecutiveFailedBatches = 0;
let backend: LogBackend;
export const enqueue = (payload: PromptLogEntry) => {
if (!started) {
@@ -34,7 +37,7 @@ export const flush = async () => {
const nextBatch = queue.splice(0, batchSize);
log.info({ size: nextBatch.length }, "Submitting new batch.");
try {
await sheets.appendBatch(nextBatch);
await backend.appendBatch(nextBatch);
retrying = false;
consecutiveFailedBatches = 0;
} catch (e: any) {
@@ -64,8 +67,20 @@ export const flush = async () => {
};
export const start = async () => {
const type = config.promptLoggingBackend!;
try {
await sheets.init(() => stop());
switch (type) {
case "google_sheets":
backend = sheets;
await sheets.init(() => stop());
break;
case "file":
backend = file;
await file.init(() => stop());
break;
default:
assertNever(type)
}
log.info("Logging backend initialized.");
started = true;
} catch (e) {