diff --git a/.env.example b/.env.example index 1742ada..e7a36d2 100644 --- a/.env.example +++ b/.env.example @@ -23,3 +23,4 @@ OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # LOG_PROMPTS=false # REJECT_DISALLOWED=false # REJECT_MESSAGE=This content violates /aicg/'s acceptable use policy. +# REJECT_SAMPLE_RATE=0.2 diff --git a/src/config.ts b/src/config.ts index e4c9541..efac00c 100644 --- a/src/config.ts +++ b/src/config.ts @@ -14,6 +14,8 @@ type Config = { maxOutputTokens: number; /** Whether requests containing disallowed characters should be rejected. */ rejectDisallowed?: boolean; + /** Rejection sample rate (0 - 1). Higher values are more strict but increase server load. */ + rejectSampleRate?: number; /** Message to return when rejecting requests. */ rejectMessage?: string; /** Logging threshold. */ @@ -29,6 +31,7 @@ export const config: Config = { modelRateLimit: getEnvWithDefault("MODEL_RATE_LIMIT", 2), maxOutputTokens: getEnvWithDefault("MAX_OUTPUT_TOKENS", 256), rejectDisallowed: getEnvWithDefault("REJECT_DISALLOWED", false), + rejectSampleRate: getEnvWithDefault("REJECT_SAMPLE_RATE", 0.2), rejectMessage: getEnvWithDefault( "REJECT_MESSAGE", "This content violates /aicg/'s acceptable use policy." diff --git a/src/proxy/rewriters/language-filter.ts b/src/proxy/rewriters/language-filter.ts index ec9d6e9..1289a5b 100644 --- a/src/proxy/rewriters/language-filter.ts +++ b/src/proxy/rewriters/language-filter.ts @@ -9,7 +9,7 @@ const DISALLOWED_REGEX = // 15k character request ten times a second. So we'll just sample 20% of the // characters and hope that's enough. const containsDisallowedCharacters = (text: string) => { - const sampleSize = Math.floor(text.length * 0.2); + const sampleSize = Math.ceil(text.length * (config.rejectSampleRate || 0.2)); const sample = text .split("") .sort(() => 0.5 - Math.random())