diff --git a/src/modules/llms/server/anthropic/anthropic.access.ts b/src/modules/llms/server/anthropic/anthropic.access.ts new file mode 100644 index 000000000..2f17b75c6 --- /dev/null +++ b/src/modules/llms/server/anthropic/anthropic.access.ts @@ -0,0 +1,166 @@ +/** + * Isomorphic Anthropic API access - works on both server and client. + * + * This module only imports zod for schema definition and provides access logic + * that works identically on server and client environments. + */ + +import * as z from 'zod/v4'; +import { TRPCError } from '@trpc/server'; + +import { env } from '~/server/env.server'; + +import { llmsFixupHost } from '../openai/openai.access'; + + +// configuration +const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com'; +const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com'; + +const DEFAULT_ANTHROPIC_HEADERS = { + // Latest version hasn't changed (as of Feb 2025) + 'anthropic-version': '2023-06-01', + + // Enable CORS for browsers - we don't use this on server + // 'anthropic-dangerous-direct-browser-access': 'true', + + // Used for instance by Claude Code - shall we set it + // 'x-app': 'big-agi', +} as const; + +const DEFAULT_ANTHROPIC_BETA_FEATURES: string[] = [ + + // NOTE: undocumented: I wonder what this is for + // 'claude-code-20250219', + + // NOTE: disabled for now, as we don't have tested side-effects for this feature yet + // 'token-efficient-tools-2025-02-19', // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use + + /** + * to use the prompt caching feature; adds to any API invocation: + * - message_start.message.usage.cache_creation_input_tokens: number + * - message_start.message.usage.cache_read_input_tokens: number + */ + 'prompt-caching-2024-07-31', + + /** + * Enables model_context_window_exceeded stop reason for models earlier than Sonnet 4.5 + * (Sonnet 4.5+ have this by default). This allows requesting max tokens without calculating + * input size, and the API will return as much as possible within the context window. + * https://docs.claude.com/en/api/handling-stop-reasons#model-context-window-exceeded + */ + // 'model-context-window-exceeded-2025-08-26', + + // now default + // 'messages-2023-12-15' +] as const; + +const PER_MODEL_BETA_FEATURES: { [modelId: string]: string[] } = { + 'claude-3-7-sonnet-20250219': [ + + /** enables long output for the 3.7 Sonnet model */ + 'output-128k-2025-02-19', + + /** computer Tools for Sonnet 3.7 [computer_20250124, text_editor_20250124, bash_20250124] */ + 'computer-use-2025-01-24', + + ] as const, +} as const; + + +// --- Anthropic Access --- + +export type AnthropicHeaderOptions = { + modelIdForBetaFeatures?: string; + vndAntWebFetch?: boolean; + vndAnt1MContext?: boolean; + enableSkills?: boolean; + enableCodeExecution?: boolean; + clientSideFetch?: boolean; // whether the request will be made from client-side (browser) - adds CORS header +}; + +export type AnthropicAccessSchema = z.infer; +export const anthropicAccessSchema = z.object({ + dialect: z.literal('anthropic'), + clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated + anthropicKey: z.string().trim(), + anthropicHost: z.string().trim().nullable(), + heliconeKey: z.string().trim().nullable(), +}); + +export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHeaderOptions): { headers: HeadersInit, url: string } { + // API key + const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || ''; + + // break for the missing key only on the default host + if (!anthropicKey && !(access.anthropicHost || env.ANTHROPIC_API_HOST)) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).' }); + + // API host + let anthropicHost = llmsFixupHost(access.anthropicHost || env.ANTHROPIC_API_HOST || DEFAULT_ANTHROPIC_HOST, apiPath); + + // Helicone for Anthropic + // https://docs.helicone.ai/getting-started/integration-method/anthropic + const heliKey = access.heliconeKey || env.HELICONE_API_KEY || false; + if (heliKey) { + if (!anthropicHost.includes(DEFAULT_ANTHROPIC_HOST) && !anthropicHost.includes(DEFAULT_HELICONE_ANTHROPIC_HOST)) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'The Helicone Anthropic Key has been provided, but the host is set to custom. Please fix it in the Models Setup page.' }); + anthropicHost = `https://${DEFAULT_HELICONE_ANTHROPIC_HOST}`; + } + + // [CSF] add CORS-allow header if client-side fetch + if (access.clientSideFetch) + options = { ...options, clientSideFetch: true }; + + return { + headers: { + 'Accept': 'application/json', + 'Content-Type': 'application/json', + ..._anthropicHeaders(options), + 'X-API-Key': anthropicKey, + ...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }), + }, + url: anthropicHost + apiPath, + }; +} + + +function _anthropicHeaders(options?: AnthropicHeaderOptions): Record { + + // accumulate the beta features + const betaFeatures = [...DEFAULT_ANTHROPIC_BETA_FEATURES]; + if (options?.modelIdForBetaFeatures) { + // string search (.includes) within the keys, to be more resilient to modelId changes/prefixing + for (const [key, value] of Object.entries(PER_MODEL_BETA_FEATURES)) + if (key.includes(options.modelIdForBetaFeatures)) + betaFeatures.push(...value); + } + + // Add beta feature for web-fetch if enabled + // Note: web-fetch-2025-09-10 is documented in official API docs but not yet in TypeScript SDK types + if (options?.vndAntWebFetch) + betaFeatures.push('web-fetch-2025-09-10'); + + // Add beta feature for 1M context window if enabled + if (options?.vndAnt1MContext) + betaFeatures.push('context-1m-2025-08-07'); + + // Add beta features for Skills API + if (options?.enableSkills) { + betaFeatures.push('skills-2025-10-02'); + betaFeatures.push('files-api-2025-04-14'); // For file downloads + } + + // Add beta feature for code execution (required for Skills) + if (options?.enableCodeExecution || options?.enableSkills) { + betaFeatures.push('code-execution-2025-08-25'); + } + + return { + ...DEFAULT_ANTHROPIC_HEADERS, + // CORS: allow browser access to Anthropic API servers + ...(options?.clientSideFetch ? { 'anthropic-dangerous-direct-browser-access': 'true' } : {}), + // Beta features + ...(betaFeatures.length ? { 'anthropic-beta': betaFeatures.join(',') } : {}), + }; +} diff --git a/src/modules/llms/server/anthropic/anthropic.router.ts b/src/modules/llms/server/anthropic/anthropic.router.ts index 3d230d30b..6679873bd 100644 --- a/src/modules/llms/server/anthropic/anthropic.router.ts +++ b/src/modules/llms/server/anthropic/anthropic.router.ts @@ -1,115 +1,12 @@ import * as z from 'zod/v4'; -import { TRPCError } from '@trpc/server'; import { createTRPCRouter, edgeProcedure } from '~/server/trpc/trpc.server'; -import { env } from '~/server/env.server'; import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; import { ListModelsResponse_schema } from '../llm.server.types'; -import { fixupHost } from '../openai/openai.router'; import { listModelsRunDispatch } from '../listModels.dispatch'; - -// configuration and defaults -const DEFAULT_ANTHROPIC_HOST = 'api.anthropic.com'; -const DEFAULT_HELICONE_ANTHROPIC_HOST = 'anthropic.hconeai.com'; - -const DEFAULT_ANTHROPIC_HEADERS = { - // Latest version hasn't changed (as of Feb 2025) - 'anthropic-version': '2023-06-01', - - // Enable CORS for browsers - we don't use this - // 'anthropic-dangerous-direct-browser-access': 'true', - - // Used for instance by Claude Code - shall we set it - // 'x-app': 'big-agi', -} as const; - -const DEFAULT_ANTHROPIC_BETA_FEATURES: string[] = [ - - // NOTE: undocumented: I wonder what this is for - // 'claude-code-20250219', - - // NOTE: disabled for now, as we don't have tested side-effects for this feature yet - // 'token-efficient-tools-2025-02-19', // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use - - /** - * to use the prompt caching feature; adds to any API invocation: - * - message_start.message.usage.cache_creation_input_tokens: number - * - message_start.message.usage.cache_read_input_tokens: number - */ - 'prompt-caching-2024-07-31', - - /** - * Enables model_context_window_exceeded stop reason for models earlier than Sonnet 4.5 - * (Sonnet 4.5+ have this by default). This allows requesting max tokens without calculating - * input size, and the API will return as much as possible within the context window. - * https://docs.claude.com/en/api/handling-stop-reasons#model-context-window-exceeded - */ - // 'model-context-window-exceeded-2025-08-26', - - // now default - // 'messages-2023-12-15' -] as const; - -const PER_MODEL_BETA_FEATURES: { [modelId: string]: string[] } = { - 'claude-3-7-sonnet-20250219': [ - - /** enables long output for the 3.7 Sonnet model */ - 'output-128k-2025-02-19', - - /** computer Tools for Sonnet 3.7 [computer_20250124, text_editor_20250124, bash_20250124] */ - 'computer-use-2025-01-24', - - ] as const, -} as const; - -type AnthropicHeaderOptions = { - modelIdForBetaFeatures?: string; - vndAntWebFetch?: boolean; - vndAnt1MContext?: boolean; - enableSkills?: boolean; - enableCodeExecution?: boolean; -}; - -function _anthropicHeaders(options?: AnthropicHeaderOptions): Record { - - // accumulate the beta features - const betaFeatures = [...DEFAULT_ANTHROPIC_BETA_FEATURES]; - if (options?.modelIdForBetaFeatures) { - // string search (.includes) within the keys, to be more resilient to modelId changes/prefixing - for (const [key, value] of Object.entries(PER_MODEL_BETA_FEATURES)) - if (key.includes(options.modelIdForBetaFeatures)) - betaFeatures.push(...value); - } - - // Add beta feature for web-fetch if enabled - // Note: web-fetch-2025-09-10 is documented in official API docs but not yet in TypeScript SDK types - if (options?.vndAntWebFetch) - betaFeatures.push('web-fetch-2025-09-10'); - - // Add beta feature for 1M context window if enabled - if (options?.vndAnt1MContext) - betaFeatures.push('context-1m-2025-08-07'); - - // Add beta features for Skills API - if (options?.enableSkills) { - betaFeatures.push('skills-2025-10-02'); - betaFeatures.push('files-api-2025-04-14'); // For file downloads - } - - // Add beta feature for code execution (required for Skills) - if (options?.enableCodeExecution || options?.enableSkills) { - betaFeatures.push('code-execution-2025-08-25'); - } - - // Note: web-search is now GA and no longer requires a beta header - - return { - ...DEFAULT_ANTHROPIC_HEADERS, - 'anthropic-beta': betaFeatures.join(','), - }; -} +import { anthropicAccess, anthropicAccessSchema, AnthropicAccessSchema, AnthropicHeaderOptions } from './anthropic.access'; // Mappers @@ -119,58 +16,9 @@ async function anthropicGETOrThrow(access: AnthropicAccessS return await fetchJsonOrTRPCThrow({ url, headers, name: 'Anthropic', signal }); } -// async function anthropicPOST(access: AnthropicAccessSchema, apiPath: string, body: TPostBody, options?: AnthropicHeaderOptions, signal?: AbortSignal): Promise { -// const { headers, url } = anthropicAccess(access, apiPath, options); -// return await fetchJsonOrTRPCThrow({ url, method: 'POST', headers, body, name: 'Anthropic', signal }); -// } - -export function anthropicAccess(access: AnthropicAccessSchema, apiPath: string, options?: AnthropicHeaderOptions): { headers: HeadersInit, url: string } { - // API key - const anthropicKey = access.anthropicKey || env.ANTHROPIC_API_KEY || ''; - - // break for the missing key only on the default host - if (!anthropicKey && !(access.anthropicHost || env.ANTHROPIC_API_HOST)) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Anthropic API Key. Add it on the UI (Models Setup) or server side (your deployment).' }); - - // API host - let anthropicHost = fixupHost(access.anthropicHost || env.ANTHROPIC_API_HOST || DEFAULT_ANTHROPIC_HOST, apiPath); - - // Helicone for Anthropic - // https://docs.helicone.ai/getting-started/integration-method/anthropic - const heliKey = access.heliconeKey || env.HELICONE_API_KEY || false; - if (heliKey) { - if (!anthropicHost.includes(DEFAULT_ANTHROPIC_HOST) && !anthropicHost.includes(DEFAULT_HELICONE_ANTHROPIC_HOST)) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'The Helicone Anthropic Key has been provided, but the host is set to custom. Please fix it in the Models Setup page.' }); - anthropicHost = `https://${DEFAULT_HELICONE_ANTHROPIC_HOST}`; - } - - // 2024-10-22: we don't support this yet, but the Anthropic SDK has `dangerouslyAllowBrowser: true` - // to use the API from Browsers via CORS - - return { - headers: { - 'Accept': 'application/json', - 'Content-Type': 'application/json', - ..._anthropicHeaders(options), - 'X-API-Key': anthropicKey, - ...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }), - }, - url: anthropicHost + apiPath, - }; -} - // Input Schemas -export const anthropicAccessSchema = z.object({ - dialect: z.literal('anthropic'), - clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated - anthropicKey: z.string().trim(), - anthropicHost: z.string().trim().nullable(), - heliconeKey: z.string().trim().nullable(), -}); -export type AnthropicAccessSchema = z.infer; - const listModelsInputSchema = z.object({ access: anthropicAccessSchema, }); diff --git a/src/modules/llms/server/gemini/gemini.access.ts b/src/modules/llms/server/gemini/gemini.access.ts new file mode 100644 index 000000000..b19b44875 --- /dev/null +++ b/src/modules/llms/server/gemini/gemini.access.ts @@ -0,0 +1,81 @@ +/** + * Isomorphic Gemini API access - works on both server and client. + * + * This module only imports zod for schema definition and provides access logic + * that works identically on server and client environments. + * + * Server: Uses header-based auth (x-goog-api-key) with package version + * Client: Uses query param auth (?key=) for CORS compatibility + */ +import * as z from 'zod/v4'; +import { TRPCError } from '@trpc/server'; + +import packageJson from '../../../../../package.json'; + +import { env } from '~/server/env.server'; + +import { GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes'; + +import { llmsFixupHost, llmsRandomKeyFromMultiKey } from '../openai/openai.access'; + + +// configuration +const DEFAULT_GEMINI_HOST = 'https://generativelanguage.googleapis.com'; + + +// --- Gemini Access --- + +export type GeminiAccessSchema = z.infer; +export const geminiAccessSchema = z.object({ + dialect: z.enum(['gemini']), + clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated + geminiKey: z.string(), + geminiHost: z.string(), + minSafetyLevel: GeminiWire_Safety.HarmBlockThreshold_enum, +}); + + +export function geminiAccess(access: GeminiAccessSchema, modelRefId: string | null, apiPath: string, useV1Alpha: boolean): { headers: HeadersInit, url: string } { + + const geminiHost = llmsFixupHost(access.geminiHost || DEFAULT_GEMINI_HOST, apiPath); + let geminiKey = access.geminiKey || env.GEMINI_API_KEY || ''; + + // multi-key with random selection - https://github.com/enricoros/big-AGI/issues/653 + geminiKey = llmsRandomKeyFromMultiKey(geminiKey); + + // validate key + if (!geminiKey) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Gemini API Key' }); + + // update model-dependent paths + if (apiPath.includes('{model=models/*}')) { + if (!modelRefId) + throw new TRPCError({ code: 'BAD_REQUEST', message: `geminiAccess: modelRefId is required for ${apiPath}` }); + apiPath = apiPath.replace('{model=models/*}', modelRefId); + } + + // [Gemini, 2025-01-23] CoT support - requires `v1alpha` Gemini API + if (useV1Alpha) + apiPath = apiPath.replaceAll('v1beta', 'v1alpha'); + + // [CSF] build headers and URL + if (access.clientSideFetch) { + const separator = apiPath.includes('?') ? '&' : '?'; + return { + headers: { + 'Content-Type': 'application/json', + }, + url: `${geminiHost}${apiPath}${separator}key=${geminiKey}`, + }; + } + + // server-side fetch + return { + headers: { + 'Content-Type': 'application/json', + 'x-goog-api-client': `big-agi/${packageJson['version'] || '1.0.0'}`, + 'x-goog-api-key': geminiKey, + }, + url: geminiHost + apiPath, + }; +} diff --git a/src/modules/llms/server/gemini/gemini.router.ts b/src/modules/llms/server/gemini/gemini.router.ts index 3eaedeb22..6a9f07b77 100644 --- a/src/modules/llms/server/gemini/gemini.router.ts +++ b/src/modules/llms/server/gemini/gemini.router.ts @@ -1,61 +1,16 @@ import * as z from 'zod/v4'; -import { TRPCError } from '@trpc/server'; -import { env } from '~/server/env.server'; - -import packageJson from '../../../../../package.json'; import { createTRPCRouter, edgeProcedure } from '~/server/trpc/trpc.server'; import { fetchJsonOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; -import { GeminiWire_Safety } from '~/modules/aix/server/dispatch/wiretypes/gemini.wiretypes'; - import { ListModelsResponse_schema } from '../llm.server.types'; -import { fixupHost } from '../openai/openai.router'; import { listModelsRunDispatch } from '../listModels.dispatch'; - -// Default hosts -const DEFAULT_GEMINI_HOST = 'https://generativelanguage.googleapis.com'; +import { geminiAccess, geminiAccessSchema, GeminiAccessSchema } from './gemini.access'; // Mappers -export function geminiAccess(access: GeminiAccessSchema, modelRefId: string | null, apiPath: string, useV1Alpha: boolean): { headers: HeadersInit, url: string } { - - const geminiHost = fixupHost(access.geminiHost || DEFAULT_GEMINI_HOST, apiPath); - let geminiKey = access.geminiKey || env.GEMINI_API_KEY || ''; - - // multi-key with random selection - https://github.com/enricoros/big-AGI/issues/653 - if (geminiKey.includes(',')) { - const multiKeys = geminiKey - .split(',') - .map(key => key.trim()) - .filter(Boolean); - geminiKey = multiKeys[Math.floor(Math.random() * multiKeys.length)]; - } - - // update model-dependent paths - if (apiPath.includes('{model=models/*}')) { - if (!modelRefId) - throw new TRPCError({ code: 'BAD_REQUEST', message: `geminiAccess: modelRefId is required for ${apiPath}` }); - apiPath = apiPath.replace('{model=models/*}', modelRefId); - } - - // [Gemini, 2025-01-23] CoT support - requires `v1alpha` Gemini API - if (useV1Alpha) - apiPath = apiPath.replaceAll('v1beta', 'v1alpha'); - - return { - headers: { - 'Content-Type': 'application/json', - 'x-goog-api-client': `big-agi/${packageJson['version'] || '1.0.0'}`, - 'x-goog-api-key': geminiKey, - }, - url: geminiHost + apiPath, - }; -} - - async function geminiGET(access: GeminiAccessSchema, modelRefId: string | null, apiPath: string /*, signal?: AbortSignal*/, useV1Alpha: boolean): Promise { const { headers, url } = geminiAccess(access, modelRefId, apiPath, useV1Alpha); return await fetchJsonOrTRPCThrow({ url, headers, name: 'Gemini' }); @@ -67,17 +22,7 @@ async function geminiPOST(access: } -// Input/Output Schemas - -export const geminiAccessSchema = z.object({ - dialect: z.enum(['gemini']), - clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated - geminiKey: z.string(), - geminiHost: z.string(), - minSafetyLevel: GeminiWire_Safety.HarmBlockThreshold_enum, -}); -export type GeminiAccessSchema = z.infer; - +// Router Input/Output Schemas const accessOnlySchema = z.object({ access: geminiAccessSchema, diff --git a/src/modules/llms/server/ollama/ollama.access.ts b/src/modules/llms/server/ollama/ollama.access.ts new file mode 100644 index 000000000..dec22d937 --- /dev/null +++ b/src/modules/llms/server/ollama/ollama.access.ts @@ -0,0 +1,40 @@ +/** + * Isomorphic Ollama API access - works on both server and client. + * + * This module only imports zod for schema definition and provides access logic + * that works identically on server and client environments. + */ + +import * as z from 'zod/v4'; + +import { env } from '~/server/env.server'; + +import { llmsFixupHost } from '../openai/openai.access'; + + +// configuration +const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434'; + + +// --- Ollama Access --- + +export type OllamaAccessSchema = z.infer; +export const ollamaAccessSchema = z.object({ + dialect: z.enum(['ollama']), + clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated + ollamaHost: z.string().trim(), + ollamaJson: z.boolean(), +}); + + +export function ollamaAccess(access: OllamaAccessSchema, apiPath: string): { headers: HeadersInit, url: string } { + + const ollamaHost = llmsFixupHost(access.ollamaHost || env.OLLAMA_API_HOST || DEFAULT_OLLAMA_HOST, apiPath); + + return { + headers: { + 'Content-Type': 'application/json', + }, + url: ollamaHost + apiPath, + }; +} diff --git a/src/modules/llms/server/ollama/ollama.router.ts b/src/modules/llms/server/ollama/ollama.router.ts index 91f459f17..7a3b0e759 100644 --- a/src/modules/llms/server/ollama/ollama.router.ts +++ b/src/modules/llms/server/ollama/ollama.router.ts @@ -2,83 +2,16 @@ import * as z from 'zod/v4'; import { TRPCError } from '@trpc/server'; import { createTRPCRouter, edgeProcedure } from '~/server/trpc/trpc.server'; -import { env } from '~/server/env.server'; import { fetchTextOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; import { serverCapitalizeFirstLetter } from '~/server/wire'; import { ListModelsResponse_schema } from '../llm.server.types'; -import { fixupHost } from '../openai/openai.router'; import { listModelsRunDispatch } from '../listModels.dispatch'; import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models'; +import { ollamaAccess, ollamaAccessSchema } from './ollama.access'; -// configuration -const DEFAULT_OLLAMA_HOST = 'http://127.0.0.1:11434'; - - -// Mappers - -export function ollamaAccess(access: OllamaAccessSchema, apiPath: string): { headers: HeadersInit, url: string } { - - const ollamaHost = fixupHost(access.ollamaHost || env.OLLAMA_API_HOST || DEFAULT_OLLAMA_HOST, apiPath); - - return { - headers: { - 'Content-Type': 'application/json', - }, - url: ollamaHost + apiPath, - }; - -} - - -/*export const ollamaChatCompletionPayload = (model: OpenAIModelSchema, history: OpenAIHistorySchema, jsonOutput: boolean, stream: boolean): WireOllamaChatCompletionInput => ({ - model: model.id, - messages: history, - options: { - ...(model.temperature !== undefined && { temperature: model.temperature }), - }, - ...(jsonOutput && { format: 'json' }), - // n: ... - // functions: ... - // function_call: ... - stream, -});*/ - - -/* Unused: switched to the Chat endpoint (above). The implementation is left here for reference. -https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion -export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean) { - - // if the first message is the system prompt, extract it - let systemPrompt: string | undefined = undefined; - if (history.length && history[0].role === 'system') { - const [firstMessage, ...rest] = history; - systemPrompt = firstMessage.content; - history = rest; - } - - // encode the prompt for ollama, assuming the same template for everyone for now - const prompt = history.map(({ role, content }) => { - return role === 'assistant' ? `\n\nAssistant: ${content}` : `\n\nHuman: ${content}`; - }).join('') + '\n\nAssistant:\n'; - - // const prompt = history.map(({ role, content }) => { - // return role === 'assistant' ? `### Response:\n${content}\n\n` : `### User:\n${content}\n\n`; - // }).join('') + '### Response:\n'; - - return { - model: model.id, - prompt, - options: { - ...(model.temperature !== undefined && { temperature: model.temperature }), - }, - ...(systemPrompt && { system: systemPrompt }), - stream, - }; -}*/ - // async function ollamaGET(access: OllamaAccessSchema, apiPath: string /*, signal?: AbortSignal*/): Promise { // const { headers, url } = ollamaAccess(access, apiPath); // return await fetchJsonOrTRPCThrow({ url, headers, name: 'Ollama' }); @@ -90,15 +23,7 @@ export function ollamaCompletionPayload(model: OpenAIModelSchema, history: OpenA // } -// Input/Output Schemas - -export const ollamaAccessSchema = z.object({ - dialect: z.enum(['ollama']), - clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated - ollamaHost: z.string().trim(), - ollamaJson: z.boolean(), -}); -export type OllamaAccessSchema = z.infer; +// Router Input/Output Schemas const accessOnlySchema = z.object({ access: ollamaAccessSchema, @@ -125,6 +50,18 @@ const listPullableOutputSchema = z.object({ export const llmOllamaRouter = createTRPCRouter({ + /* Ollama: List the Models available */ + listModels: edgeProcedure + .input(accessOnlySchema) + .output(ListModelsResponse_schema) + .query(async ({ ctx, input, signal }) => { + + const models = await listModelsRunDispatch(input.access, signal); + + return { models }; + }), + + /* Ollama: models that can be pulled */ adminListPullable: edgeProcedure .input(accessOnlySchema) @@ -176,16 +113,4 @@ export const llmOllamaRouter = createTRPCRouter({ throw new TRPCError({ code: 'BAD_REQUEST', message: 'Ollama delete issue: ' + deleteOutput }); }), - - /* Ollama: List the Models available */ - listModels: edgeProcedure - .input(accessOnlySchema) - .output(ListModelsResponse_schema) - .query(async ({ input, signal }) => { - - const models = await listModelsRunDispatch(input.access, signal); - - return { models }; - }), - }); diff --git a/src/modules/llms/server/openai/models/azure.models.ts b/src/modules/llms/server/openai/models/azure.models.ts index 410972ef8..5250390cb 100644 --- a/src/modules/llms/server/openai/models/azure.models.ts +++ b/src/modules/llms/server/openai/models/azure.models.ts @@ -1,18 +1,11 @@ import * as z from 'zod/v4'; -import { TRPCError } from '@trpc/server'; - -import { env } from '~/server/env.server'; // import { LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types'; -import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.server.types'; - -import type { OpenAIAccessSchema } from '../openai.router'; -import { fixupHost } from '../openai.router'; - -import { fromManualMapping, ManualMappings } from '../../models.mappings'; +import type { ModelDescriptionSchema } from '../../llm.server.types'; import { _fallbackOpenAIModel, _knownOpenAIChatModels } from './openai.models'; +import { fromManualMapping, ManualMappings } from '../../models.mappings'; // configuration @@ -150,88 +143,3 @@ export function azureDeploymentToModelDescription(deployment: AzureOpenAIDeploym ...restOfModelDescription, }; } - - -function _azureServerSideVars() { - return { - apiKey: env.AZURE_OPENAI_API_KEY || '', - apiEndpoint: env.AZURE_OPENAI_API_ENDPOINT || '', - // 'v1' is the next-gen API, which doesn't have a monthly version string anymore - apiEnableV1: env.AZURE_OPENAI_DISABLE_V1 !== 'true', - // https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle?tabs=key - versionAzureOpenAI: env.AZURE_OPENAI_API_VERSION || '2025-04-01-preview', - // old-school API used to list deployments - still needed for listing models, as even /v1/models would list any model available on azure and not just the deployed ones - versionDeployments: env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview', - }; -} - -export function azureOpenAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues { - - // Server-side configuration, with defaults - const server = _azureServerSideVars(); - - // Client-provided values always take precedence over server env vars - const azureKey = access.oaiKey || server.apiKey || ''; - const azureHostFixed = fixupHost(access.oaiHost || server.apiEndpoint || '', apiPath); - - // Normalize to origin only (discard path/query) to prevent malformed URLs - let azureBase: string; - try { - azureBase = new URL(azureHostFixed).origin; - } catch (e) { - throw new TRPCError({ code: 'BAD_REQUEST', message: `Azure OpenAI API Host is invalid: ${azureHostFixed || 'missing'}` }); - } - - if (!azureKey || !azureBase) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); - - /** - * Azure OpenAI API Routing: Convert OpenAI standard paths to Azure-specific paths - * - * Azure supports two API patterns: - * 1. Next-gen v1 API (/openai/v1/...): Direct endpoints without deployment IDs - * - Used for GPT-5-like models with advanced features - * - Enabled by default, can be disabled via AZURE_OPENAI_DISABLE_V1=true - * 2. Traditional deployment-based API (/openai/deployments/{id}/...): Legacy pattern - * - Required for older models and when v1 API is disabled - * - Requires deployment ID for all API calls - */ - switch (true) { - - // List models - case apiPath === '/v1/models': - // uses the good old Azure OpenAI Deployments listing API - apiPath = `/openai/deployments?api-version=${server.versionDeployments}`; - break; - - // Responses API - next-gen v1 API - case apiPath === '/v1/responses' && server.apiEnableV1: - // Next-gen v1 API: direct endpoint without deployment path - apiPath = '/openai/v1/responses'; // NOTE: we seem to not need the api-version query param here - // apiPath = `/openai/v1/responses?api-version=${server.versionResponses}`; - // console.log('[Azure] Using next-gen v1 API for Responses:', apiPath); - break; - - // Chat Completions API, and other v1 APIs - case apiPath === '/v1/chat/completions' || apiPath === '/v1/responses' || apiPath.startsWith('/v1/'): - - // require the model Id for traditional deployment-based routing - if (!modelRefId) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Azure OpenAI API needs a deployment id' }); - - const functionName = apiPath.replace('/v1/', ''); // e.g. 'chat/completions' - apiPath = `/openai/deployments/${modelRefId}/${functionName}?api-version=${server.versionAzureOpenAI}`; - break; - - default: - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Azure OpenAI API path not supported: ' + apiPath }); - } - - return { - headers: { - 'Content-Type': 'application/json', - 'api-key': azureKey, - }, - url: azureBase + apiPath, - }; -} diff --git a/src/modules/llms/server/openai/openai.access.ts b/src/modules/llms/server/openai/openai.access.ts new file mode 100644 index 000000000..d05714104 --- /dev/null +++ b/src/modules/llms/server/openai/openai.access.ts @@ -0,0 +1,436 @@ +/** + * Isomorphic OpenAI-compatible API access - works on both server and client. + * + * This module only imports zod for schema definition and provides access logic + * that works identically on server and client environments. + * + * Supports 14 OpenAI-compatible dialects: alibaba, azure, deepseek, groq, lmstudio, + * localai, mistral, moonshot, openai, openpipe, openrouter, perplexity, togetherai, xai + */ + +import * as z from 'zod/v4'; +import { TRPCError } from '@trpc/server'; + +import { BaseProduct } from '~/common/app.release'; + +import { env } from '~/server/env.server'; + +import type { RequestAccessValues } from '../llm.server.types'; + + +// configuration +const DEFAULT_ALIBABA_HOST = 'https://dashscope-intl.aliyuncs.com/compatible-mode'; +const DEFAULT_DEEPSEEK_HOST = 'https://api.deepseek.com'; +const DEFAULT_GROQ_HOST = 'https://api.groq.com/openai'; +const DEFAULT_HELICONE_OPENAI_HOST = 'oai.hconeai.com'; +const DEFAULT_LMSTUDIO_HOST = 'http://localhost:1234'; +const DEFAULT_LOCALAI_HOST = 'http://127.0.0.1:8080'; +const DEFAULT_MISTRAL_HOST = 'https://api.mistral.ai'; +const DEFAULT_MOONSHOT_HOST = 'https://api.moonshot.ai'; +const DEFAULT_OPENAI_HOST = 'api.openai.com'; +const DEFAULT_OPENPIPE_HOST = 'https://app.openpipe.ai/api'; +const DEFAULT_OPENROUTER_HOST = 'https://openrouter.ai/api'; +const DEFAULT_PERPLEXITY_HOST = 'https://api.perplexity.ai'; +const DEFAULT_TOGETHERAI_HOST = 'https://api.together.xyz'; +const DEFAULT_XAI_HOST = 'https://api.x.ai'; + + +// --- Fixup Host (all accesses) --- + +/** Add https if missing, and remove trailing slash if present and the path starts with a slash. */ +export function llmsFixupHost(host: string, apiPath: string): string { + if (!host) + return ''; + if (!host.startsWith('http')) + host = `https://${host}`; + if (host.endsWith('/') && apiPath.startsWith('/')) + host = host.slice(0, -1); + return host; +} + +/** Select a random key from a comma-separated list of API keys, used to load balance. */ +export function llmsRandomKeyFromMultiKey(multiKeyString: string): string { + if (!multiKeyString.includes(',')) + return multiKeyString; + + const multiKeys = multiKeyString + .split(',') + .map(key => key.trim()) + .filter(Boolean); + + if (!multiKeys.length) + return ''; + + return multiKeys[Math.floor(Math.random() * multiKeys.length)]; +} + + +// --- OpenAI-Compatible Access --- + +export type OpenAIDialects = OpenAIAccessSchema['dialect']; +export type OpenAIAccessSchema = z.infer; +export const openAIAccessSchema = z.object({ + dialect: z.enum([ + 'alibaba', 'azure', 'deepseek', 'groq', 'lmstudio', + 'localai', 'mistral', 'moonshot', 'openai', 'openpipe', + 'openrouter', 'perplexity', 'togetherai', 'xai', + ]), + clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated + oaiKey: z.string().trim(), + oaiOrg: z.string().trim(), // [OpenPipe] we have a hack here, where we put the tags stringified JSON in here - cleanup in the future + oaiHost: z.string().trim(), + heliKey: z.string().trim(), + moderationCheck: z.boolean(), +}); + +export function openAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): { headers: HeadersInit, url: string } { + switch (access.dialect) { + + case 'alibaba': + let alibabaOaiKey = access.oaiKey || env.ALIBABA_API_KEY || ''; + const alibabaOaiHost = llmsFixupHost(access.oaiHost || env.ALIBABA_API_HOST || DEFAULT_ALIBABA_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + alibabaOaiKey = llmsRandomKeyFromMultiKey(alibabaOaiKey); + + if (!alibabaOaiKey || !alibabaOaiHost) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Alibaba API Key. Add it on the UI or server side (your deployment).' }); + + return { + headers: { + 'Authorization': `Bearer ${alibabaOaiKey}`, + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }, + url: alibabaOaiHost + apiPath, + }; + + case 'azure': + return _azureOpenAIAccess(access, modelRefId, apiPath); + + case 'deepseek': + // https://platform.deepseek.com/api-docs/ + let deepseekKey = access.oaiKey || env.DEEPSEEK_API_KEY || ''; + const deepseekHost = llmsFixupHost(access.oaiHost || DEFAULT_DEEPSEEK_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + deepseekKey = llmsRandomKeyFromMultiKey(deepseekKey); + + if (!deepseekKey || !deepseekHost) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Deepseek API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); + + return { + headers: { + 'Authorization': `Bearer ${deepseekKey}`, + 'Content-Type': 'application/json', + }, + url: deepseekHost + apiPath, + }; + + case 'groq': + let groqKey = access.oaiKey || env.GROQ_API_KEY || ''; + const groqHost = llmsFixupHost(access.oaiHost || DEFAULT_GROQ_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + groqKey = llmsRandomKeyFromMultiKey(groqKey); + + if (!groqKey) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Groq API Key. Add it on the UI (Models Setup) or server side (your deployment).' }); + + return { + headers: { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'Authorization': `Bearer ${groqKey}`, + }, + url: groqHost + apiPath, + }; + + case 'lmstudio': + const lmsAIKey = access.oaiKey || ''; + let lmsAIHost = llmsFixupHost(access.oaiHost || DEFAULT_LMSTUDIO_HOST, apiPath); + return { + headers: { + 'Content-Type': 'application/json', + ...(lmsAIKey && { Authorization: `Bearer ${lmsAIKey}` }), + }, + url: lmsAIHost + apiPath, + }; + + case 'localai': + const localAIKey = access.oaiKey || env.LOCALAI_API_KEY || ''; + let localAIHost = llmsFixupHost(access.oaiHost || env.LOCALAI_API_HOST || DEFAULT_LOCALAI_HOST, apiPath); + return { + headers: { + 'Content-Type': 'application/json', + ...(localAIKey && { Authorization: `Bearer ${localAIKey}` }), + }, + url: localAIHost + apiPath, + }; + + case 'mistral': + // https://docs.mistral.ai/platform/client + let mistralKey = access.oaiKey || env.MISTRAL_API_KEY || ''; + const mistralHost = llmsFixupHost(access.oaiHost || DEFAULT_MISTRAL_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + mistralKey = llmsRandomKeyFromMultiKey(mistralKey); + + return { + headers: { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'Authorization': `Bearer ${mistralKey}`, + }, + url: mistralHost + apiPath, + }; + + case 'moonshot': + // https://platform.moonshot.ai/docs/api/chat + let moonshotKey = access.oaiKey || env.MOONSHOT_API_KEY || ''; + const moonshotHost = llmsFixupHost(access.oaiHost || DEFAULT_MOONSHOT_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + moonshotKey = llmsRandomKeyFromMultiKey(moonshotKey); + + if (!moonshotKey || !moonshotHost) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Moonshot API Key or Host. Add it on the UI or server side.' }); + + return { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${moonshotKey}`, + }, + url: moonshotHost + apiPath, + }; + + case 'openai': + const oaiKey = access.oaiKey || env.OPENAI_API_KEY || ''; + const oaiOrg = access.oaiOrg || env.OPENAI_API_ORG_ID || ''; + let oaiHost = llmsFixupHost(access.oaiHost || env.OPENAI_API_HOST || DEFAULT_OPENAI_HOST, apiPath); + // warn if no key - only for default (non-overridden) hosts + if (!oaiKey && oaiHost.indexOf(DEFAULT_OPENAI_HOST) !== -1) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing OpenAI API Key. Add it on the UI or server side (your deployment).' }); + + // [Helicone] + // We don't change the host (as we do on Anthropic's), as we expect the user to have a custom host. + let heliKey = access.heliKey || env.HELICONE_API_KEY || false; + if (heliKey) { + if (oaiHost.includes(DEFAULT_OPENAI_HOST)) { + oaiHost = `https://${DEFAULT_HELICONE_OPENAI_HOST}`; + } else if (!oaiHost.includes(DEFAULT_HELICONE_OPENAI_HOST)) { + // throw new Error(`The Helicone OpenAI Key has been provided, but the host is not set to https://${DEFAULT_HELICONE_OPENAI_HOST}. Please fix it in the Models Setup page.`); + heliKey = false; + } + } + + // [Cloudflare OpenAI AI Gateway support] + // Adapts the API path when using a 'universal' or 'openai' Cloudflare AI Gateway endpoint in the "API Host" field + if (oaiHost.includes('https://gateway.ai.cloudflare.com')) { + const parsedUrl = new URL(oaiHost); + const pathSegments = parsedUrl.pathname.split('/').filter(segment => segment.length > 0); + + // The expected path should be: /v1/// + if (pathSegments.length < 3 || pathSegments.length > 4 || pathSegments[0] !== 'v1') + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Cloudflare AI Gateway API Host is not valid. Please check the API Host field in the Models Setup page.' }); + + const [_v1, accountTag, gatewayName, provider] = pathSegments; + if (provider && provider !== 'openai') + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Cloudflare AI Gateway only supports OpenAI as a provider.' }); + + if (apiPath.startsWith('/v1')) + apiPath = apiPath.replace('/v1', ''); + + oaiHost = 'https://gateway.ai.cloudflare.com'; + apiPath = `/v1/${accountTag}/${gatewayName}/${provider || 'openai'}${apiPath}`; + } + + return { + headers: { + 'Content-Type': 'application/json', + ...(oaiKey && { Authorization: `Bearer ${oaiKey}` }), + ...(oaiOrg && { 'OpenAI-Organization': oaiOrg }), + ...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }), + }, + url: oaiHost + apiPath, + }; + + case 'openpipe': + const openPipeKey = access.oaiKey || env.OPENPIPE_API_KEY || ''; + if (!openPipeKey) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing OpenPipe API Key or Host. Add it on the UI or server side (your deployment).' }); + + return { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${openPipeKey}`, + 'op-log-request': 'true', + ...(access.oaiOrg && { 'op-tags': access.oaiOrg }), + }, + url: llmsFixupHost(DEFAULT_OPENPIPE_HOST, apiPath) + apiPath, + }; + + case 'openrouter': + let orKey = access.oaiKey || env.OPENROUTER_API_KEY || ''; + const orHost = llmsFixupHost(access.oaiHost || DEFAULT_OPENROUTER_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + orKey = llmsRandomKeyFromMultiKey(orKey); + + if (!orKey || !orHost) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing OpenRouter API Key or Host. Add it on the UI or server side (your deployment).' }); + + return { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${orKey}`, + 'HTTP-Referer': BaseProduct.ProductURL, + 'X-Title': BaseProduct.ProductName, + }, + url: orHost + apiPath, + }; + + case 'perplexity': + let perplexityKey = access.oaiKey || env.PERPLEXITY_API_KEY || ''; + const perplexityHost = llmsFixupHost(access.oaiHost || DEFAULT_PERPLEXITY_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + perplexityKey = llmsRandomKeyFromMultiKey(perplexityKey); + + if (!perplexityKey || !perplexityHost) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Perplexity API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); + + if (apiPath.startsWith('/v1')) + apiPath = apiPath.replace('/v1', ''); + + return { + headers: { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'Authorization': `Bearer ${perplexityKey}`, + }, + url: perplexityHost + apiPath, + }; + + case 'togetherai': + let togetherKey = access.oaiKey || env.TOGETHERAI_API_KEY || ''; + const togetherHost = llmsFixupHost(access.oaiHost || DEFAULT_TOGETHERAI_HOST, apiPath); + + // Use function to select a random key if multiple keys are provided + togetherKey = llmsRandomKeyFromMultiKey(togetherKey); + + if (!togetherKey || !togetherHost) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing TogetherAI API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); + + return { + headers: { + 'Authorization': `Bearer ${togetherKey}`, + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }, + url: togetherHost + apiPath, + }; + + case 'xai': + let xaiKey = access.oaiKey || env.XAI_API_KEY || ''; + + // Use function to select a random key if multiple keys are provided + xaiKey = llmsRandomKeyFromMultiKey(xaiKey); + + if (!xaiKey) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing xAI API Key. Add it on the UI (Models Setup) or server side (your deployment).' }); + + return { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${xaiKey}`, + }, + url: DEFAULT_XAI_HOST + apiPath, + }; + + } +} + +function _azureServerSideVars() { + return { + apiKey: env.AZURE_OPENAI_API_KEY || '', + apiEndpoint: env.AZURE_OPENAI_API_ENDPOINT || '', + // 'v1' is the next-gen API, which doesn't have a monthly version string anymore + apiEnableV1: env.AZURE_OPENAI_DISABLE_V1 !== 'true', + // https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle?tabs=key + versionAzureOpenAI: env.AZURE_OPENAI_API_VERSION || '2025-04-01-preview', + // old-school API used to list deployments - still needed for listing models, as even /v1/models would list any model available on azure and not just the deployed ones + versionDeployments: env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview', + }; +} + +function _azureOpenAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues { + + // Server-side configuration, with defaults + const server = _azureServerSideVars(); + + // Client-provided values always take precedence over server env vars + const azureKey = access.oaiKey || server.apiKey || ''; + const azureHostFixed = llmsFixupHost(access.oaiHost || server.apiEndpoint || '', apiPath); + + // Normalize to origin only (discard path/query) to prevent malformed URLs + let azureBase: string; + try { + azureBase = new URL(azureHostFixed).origin; + } catch (e) { + throw new TRPCError({ code: 'BAD_REQUEST', message: `Azure OpenAI API Host is invalid: ${azureHostFixed || 'missing'}` }); + } + + if (!azureKey || !azureBase) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); + + /** + * Azure OpenAI API Routing: Convert OpenAI standard paths to Azure-specific paths + * + * Azure supports two API patterns: + * 1. Next-gen v1 API (/openai/v1/...): Direct endpoints without deployment IDs + * - Used for GPT-5-like models with advanced features + * - Enabled by default, can be disabled via AZURE_OPENAI_DISABLE_V1=true + * 2. Traditional deployment-based API (/openai/deployments/{id}/...): Legacy pattern + * - Required for older models and when v1 API is disabled + * - Requires deployment ID for all API calls + */ + switch (true) { + + // List models + case apiPath === '/v1/models': + // uses the good old Azure OpenAI Deployments listing API + apiPath = `/openai/deployments?api-version=${server.versionDeployments}`; + break; + + // Responses API - next-gen v1 API + case apiPath === '/v1/responses' && server.apiEnableV1: + // Next-gen v1 API: direct endpoint without deployment path + apiPath = '/openai/v1/responses'; // NOTE: we seem to not need the api-version query param here + // apiPath = `/openai/v1/responses?api-version=${server.versionResponses}`; + // console.log('[Azure] Using next-gen v1 API for Responses:', apiPath); + break; + + // Chat Completions API, and other v1 APIs + case apiPath === '/v1/chat/completions' || apiPath === '/v1/responses' || apiPath.startsWith('/v1/'): + + // require the model Id for traditional deployment-based routing + if (!modelRefId) + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Azure OpenAI API needs a deployment id' }); + + const functionName = apiPath.replace('/v1/', ''); // e.g. 'chat/completions' + apiPath = `/openai/deployments/${modelRefId}/${functionName}?api-version=${server.versionAzureOpenAI}`; + break; + + default: + throw new TRPCError({ code: 'BAD_REQUEST', message: 'Azure OpenAI API path not supported: ' + apiPath }); + } + + return { + headers: { + 'Content-Type': 'application/json', + 'api-key': azureKey, + }, + url: azureBase + apiPath, + }; +} diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index e36020f60..f2f66ed9c 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -2,7 +2,6 @@ import * as z from 'zod/v4'; import { TRPCError } from '@trpc/server'; import { createTRPCRouter, edgeProcedure } from '~/server/trpc/trpc.server'; -import { env } from '~/server/env.server'; import { fetchJsonOrTRPCThrow, TRPCFetcherError } from '~/server/trpc/trpc.router.fetchers'; import { serverCapitalizeFirstLetter } from '~/server/wire'; @@ -10,59 +9,15 @@ import type { T2ICreateImageAsyncStreamOp } from '~/modules/t2i/t2i.server'; import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Moderations_Create } from '~/modules/aix/server/dispatch/wiretypes/openai.wiretypes'; import { heartbeatsWhileAwaiting } from '~/modules/aix/server/dispatch/heartbeatsWhileAwaiting'; -import { BaseProduct } from '~/common/app.release'; - -import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types'; -import { azureOpenAIAccess } from './models/azure.models'; -import { listModelsRunDispatch } from '../listModels.dispatch'; import { wireLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './wiretypes/localai.wiretypes'; +import { ListModelsResponse_schema, ModelDescriptionSchema } from '../llm.server.types'; +import { listModelsRunDispatch } from '../listModels.dispatch'; -const openAIDialects = z.enum([ - 'alibaba', 'azure', 'deepseek', 'groq', 'lmstudio', 'localai', 'mistral', 'moonshot', 'openai', 'openpipe', 'openrouter', 'perplexity', 'togetherai', 'xai', -]); -export type OpenAIDialects = z.infer; - -export const openAIAccessSchema = z.object({ - dialect: openAIDialects, - clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated - oaiKey: z.string().trim(), - oaiOrg: z.string().trim(), // [OpenPipe] we have a hack here, where we put the tags stringified JSON in here - cleanup in the future - oaiHost: z.string().trim(), - heliKey: z.string().trim(), - moderationCheck: z.boolean(), -}); -export type OpenAIAccessSchema = z.infer; - -// export const openAIModelSchema = z.object({ -// id: z.string(), -// temperature: z.number().min(0).max(2).optional(), -// maxTokens: z.number().min(1).optional(), -// }); -// export type OpenAIModelSchema = z.infer; - -// export const openAIHistorySchema = z.array(z.object({ -// role: z.enum(['assistant', 'system', 'user'/*, 'function'*/]), -// content: z.string(), -// })); -// export type OpenAIHistorySchema = z.infer; +import { openAIAccess, OpenAIAccessSchema, openAIAccessSchema } from './openai.access'; -// Fixup host function - -/** Add https if missing, and remove trailing slash if present and the path starts with a slash. */ -export function fixupHost(host: string, apiPath: string): string { - if (!host) - return ''; - if (!host.startsWith('http')) - host = `https://${host}`; - if (host.endsWith('/') && apiPath.startsWith('/')) - host = host.slice(0, -1); - return host; -} - - -// Router Input Schemas +// Router Input/Output Schemas const listModelsInputSchema = z.object({ access: openAIAccessSchema, @@ -393,307 +348,7 @@ export const llmOpenAIRouter = createTRPCRouter({ }); -const DEFAULT_ALIBABA_HOST = 'https://dashscope-intl.aliyuncs.com/compatible-mode'; -const DEFAULT_HELICONE_OPENAI_HOST = 'oai.hconeai.com'; -const DEFAULT_DEEPSEEK_HOST = 'https://api.deepseek.com'; -const DEFAULT_GROQ_HOST = 'https://api.groq.com/openai'; -const DEFAULT_LOCALAI_HOST = 'http://127.0.0.1:8080'; -const DEFAULT_MISTRAL_HOST = 'https://api.mistral.ai'; -const DEFAULT_MOONSHOT_HOST = 'https://api.moonshot.ai'; -const DEFAULT_OPENAI_HOST = 'api.openai.com'; -const DEFAULT_OPENPIPE_HOST = 'https://app.openpipe.ai/api'; -const DEFAULT_OPENROUTER_HOST = 'https://openrouter.ai/api'; -const DEFAULT_PERPLEXITY_HOST = 'https://api.perplexity.ai'; -const DEFAULT_TOGETHERAI_HOST = 'https://api.together.xyz'; -const DEFAULT_XAI_HOST = 'https://api.x.ai'; - - -/** - * Get a random key from a comma-separated list of API keys - * @param multiKeyString Comma-separated string of API keys - * @returns A randomly selected single API key - */ -function getRandomKeyFromMultiKey(multiKeyString: string): string { - if (!multiKeyString.includes(',')) - return multiKeyString; - - const multiKeys = multiKeyString - .split(',') - .map(key => key.trim()) - .filter(Boolean); - - if (!multiKeys.length) - return ''; - - return multiKeys[Math.floor(Math.random() * multiKeys.length)]; -} - -export function openAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues { - switch (access.dialect) { - - case 'alibaba': - let alibabaOaiKey = access.oaiKey || env.ALIBABA_API_KEY || ''; - const alibabaOaiHost = fixupHost(access.oaiHost || env.ALIBABA_API_HOST || DEFAULT_ALIBABA_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - alibabaOaiKey = getRandomKeyFromMultiKey(alibabaOaiKey); - - if (!alibabaOaiKey || !alibabaOaiHost) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Alibaba API Key. Add it on the UI or server side (your deployment).' }); - - return { - headers: { - 'Authorization': `Bearer ${alibabaOaiKey}`, - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - url: alibabaOaiHost + apiPath, - }; - - - case 'azure': - return azureOpenAIAccess(access, modelRefId, apiPath); - - - case 'deepseek': - // https://platform.deepseek.com/api-docs/ - let deepseekKey = access.oaiKey || env.DEEPSEEK_API_KEY || ''; - const deepseekHost = fixupHost(access.oaiHost || DEFAULT_DEEPSEEK_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - deepseekKey = getRandomKeyFromMultiKey(deepseekKey); - - if (!deepseekKey || !deepseekHost) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Deepseek API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); - - return { - headers: { - 'Authorization': `Bearer ${deepseekKey}`, - 'Content-Type': 'application/json', - }, - url: deepseekHost + apiPath, - }; - - - case 'lmstudio': - case 'openai': - const oaiKey = access.oaiKey || env.OPENAI_API_KEY || ''; - const oaiOrg = access.oaiOrg || env.OPENAI_API_ORG_ID || ''; - let oaiHost = fixupHost(access.oaiHost || env.OPENAI_API_HOST || DEFAULT_OPENAI_HOST, apiPath); - // warn if no key - only for default (non-overridden) hosts - if (!oaiKey && oaiHost.indexOf(DEFAULT_OPENAI_HOST) !== -1) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing OpenAI API Key. Add it on the UI or server side (your deployment).' }); - - // [Helicone] - // We don't change the host (as we do on Anthropic's), as we expect the user to have a custom host. - let heliKey = access.heliKey || env.HELICONE_API_KEY || false; - if (heliKey) { - if (oaiHost.includes(DEFAULT_OPENAI_HOST)) { - oaiHost = `https://${DEFAULT_HELICONE_OPENAI_HOST}`; - } else if (!oaiHost.includes(DEFAULT_HELICONE_OPENAI_HOST)) { - // throw new Error(`The Helicone OpenAI Key has been provided, but the host is not set to https://${DEFAULT_HELICONE_OPENAI_HOST}. Please fix it in the Models Setup page.`); - heliKey = false; - } - } - - // [Cloudflare OpenAI AI Gateway support] - // Adapts the API path when using a 'universal' or 'openai' Cloudflare AI Gateway endpoint in the "API Host" field - if (oaiHost.includes('https://gateway.ai.cloudflare.com')) { - const parsedUrl = new URL(oaiHost); - const pathSegments = parsedUrl.pathname.split('/').filter(segment => segment.length > 0); - - // The expected path should be: /v1/// - if (pathSegments.length < 3 || pathSegments.length > 4 || pathSegments[0] !== 'v1') - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Cloudflare AI Gateway API Host is not valid. Please check the API Host field in the Models Setup page.' }); - - const [_v1, accountTag, gatewayName, provider] = pathSegments; - if (provider && provider !== 'openai') - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Cloudflare AI Gateway only supports OpenAI as a provider.' }); - - if (apiPath.startsWith('/v1')) - apiPath = apiPath.replace('/v1', ''); - - oaiHost = 'https://gateway.ai.cloudflare.com'; - apiPath = `/v1/${accountTag}/${gatewayName}/${provider || 'openai'}${apiPath}`; - } - - return { - headers: { - 'Content-Type': 'application/json', - ...(oaiKey && { Authorization: `Bearer ${oaiKey}` }), - ...(oaiOrg && { 'OpenAI-Organization': oaiOrg }), - ...(heliKey && { 'Helicone-Auth': `Bearer ${heliKey}` }), - }, - url: oaiHost + apiPath, - }; - - case 'groq': - let groqKey = access.oaiKey || env.GROQ_API_KEY || ''; - const groqHost = fixupHost(access.oaiHost || DEFAULT_GROQ_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - groqKey = getRandomKeyFromMultiKey(groqKey); - - if (!groqKey) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Groq API Key. Add it on the UI (Models Setup) or server side (your deployment).' }); - - return { - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'Authorization': `Bearer ${groqKey}`, - }, - url: groqHost + apiPath, - }; - - - case 'localai': - const localAIKey = access.oaiKey || env.LOCALAI_API_KEY || ''; - let localAIHost = fixupHost(access.oaiHost || env.LOCALAI_API_HOST || DEFAULT_LOCALAI_HOST, apiPath); - return { - headers: { - 'Content-Type': 'application/json', - ...(localAIKey && { Authorization: `Bearer ${localAIKey}` }), - }, - url: localAIHost + apiPath, - }; - - - case 'mistral': - // https://docs.mistral.ai/platform/client - let mistralKey = access.oaiKey || env.MISTRAL_API_KEY || ''; - const mistralHost = fixupHost(access.oaiHost || DEFAULT_MISTRAL_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - mistralKey = getRandomKeyFromMultiKey(mistralKey); - - return { - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'Authorization': `Bearer ${mistralKey}`, - }, - url: mistralHost + apiPath, - }; - - case 'moonshot': - // https://platform.moonshot.ai/docs/api/chat - let moonshotKey = access.oaiKey || env.MOONSHOT_API_KEY || ''; - const moonshotHost = fixupHost(access.oaiHost || DEFAULT_MOONSHOT_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - moonshotKey = getRandomKeyFromMultiKey(moonshotKey); - - if (!moonshotKey || !moonshotHost) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Moonshot API Key or Host. Add it on the UI or server side.' }); - - return { - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${moonshotKey}`, - }, - url: moonshotHost + apiPath, - }; - - - case 'openpipe': - const openPipeKey = access.oaiKey || env.OPENPIPE_API_KEY || ''; - if (!openPipeKey) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing OpenPipe API Key or Host. Add it on the UI or server side (your deployment).' }); - - return { - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${openPipeKey}`, - 'op-log-request': 'true', - ...(access.oaiOrg && { 'op-tags': access.oaiOrg }), - }, - url: fixupHost(DEFAULT_OPENPIPE_HOST, apiPath) + apiPath, - }; - - case 'openrouter': - let orKey = access.oaiKey || env.OPENROUTER_API_KEY || ''; - const orHost = fixupHost(access.oaiHost || DEFAULT_OPENROUTER_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - orKey = getRandomKeyFromMultiKey(orKey); - - if (!orKey || !orHost) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing OpenRouter API Key or Host. Add it on the UI or server side (your deployment).' }); - - return { - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${orKey}`, - 'HTTP-Referer': BaseProduct.ProductURL, - 'X-Title': BaseProduct.ProductName, - }, - url: orHost + apiPath, - }; - - case 'perplexity': - let perplexityKey = access.oaiKey || env.PERPLEXITY_API_KEY || ''; - const perplexityHost = fixupHost(access.oaiHost || DEFAULT_PERPLEXITY_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - perplexityKey = getRandomKeyFromMultiKey(perplexityKey); - - if (!perplexityKey || !perplexityHost) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing Perplexity API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); - - if (apiPath.startsWith('/v1')) - apiPath = apiPath.replace('/v1', ''); - - return { - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'Authorization': `Bearer ${perplexityKey}`, - }, - url: perplexityHost + apiPath, - }; - - - case 'togetherai': - let togetherKey = access.oaiKey || env.TOGETHERAI_API_KEY || ''; - const togetherHost = fixupHost(access.oaiHost || DEFAULT_TOGETHERAI_HOST, apiPath); - - // Use function to select a random key if multiple keys are provided - togetherKey = getRandomKeyFromMultiKey(togetherKey); - - if (!togetherKey || !togetherHost) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing TogetherAI API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).' }); - - return { - headers: { - 'Authorization': `Bearer ${togetherKey}`, - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - url: togetherHost + apiPath, - }; - - - case 'xai': - let xaiKey = access.oaiKey || env.XAI_API_KEY || ''; - - // Use function to select a random key if multiple keys are provided - xaiKey = getRandomKeyFromMultiKey(xaiKey); - - if (!xaiKey) - throw new TRPCError({ code: 'BAD_REQUEST', message: 'Missing xAI API Key. Add it on the UI (Models Setup) or server side (your deployment).' }); - - return { - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${xaiKey}`, - }, - url: DEFAULT_XAI_HOST + apiPath, - }; - - } -} - +// Mappers - all access logic is now in openai.access.ts async function openaiGETOrThrow(access: OpenAIAccessSchema, apiPath: string, signal: undefined | AbortSignal = undefined): Promise { const { headers, url } = openAIAccess(access, null, apiPath);