diff --git a/src/modules/llms/server/openai/models/azure.models.ts b/src/modules/llms/server/openai/models/azure.models.ts index 433d038f9..6f5e237cf 100644 --- a/src/modules/llms/server/openai/models/azure.models.ts +++ b/src/modules/llms/server/openai/models/azure.models.ts @@ -1,9 +1,14 @@ import * as z from 'zod/v4'; +import { env } from '~/server/env'; + // import { LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types'; import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types'; -import type { ModelDescriptionSchema } from '../../llm.server.types'; +import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.server.types'; + +import type { OpenAIAccessSchema } from '../openai.router'; +import { fixupHost } from '../openai.router'; import { fromManualMapping, ManualMappings } from './models.data'; import { _knownOpenAIChatModels } from './openai.models'; @@ -63,21 +68,20 @@ const _azureOpenAIDeployment_schema = z.object({ }); type AzureOpenAIDeployment = z.infer; -const _azureOpenAIDeploymentsList_schema = z.object({ - object: z.literal('list'), - data: z.array(_azureOpenAIDeployment_schema), -}); - +// const _azureOpenAIDeploymentsList_schema = z.object({ +// object: z.literal('list'), +// data: z.array(_azureOpenAIDeployment_schema), +// }); export function azureParseFromDeploymentsAPI(deploymentsApiResponse: object): AzureOpenAIDeployment[] { - return _azureOpenAIDeploymentsList_schema.parse(deploymentsApiResponse).data; + return z.array(_azureOpenAIDeployment_schema).parse(deploymentsApiResponse); } const _azureDenyListPrefix = [ // unsupported for chat: text embedding models 'text-embedding-', -]; +] as const; export function azureDeploymentFilter({ id }: AzureOpenAIDeployment) { // filter out models that are not chat models @@ -122,3 +126,88 @@ export function azureDeploymentToModelDescription(deployment: AzureOpenAIDeploym ...restOfModelDescription, }; } + + +function _azureServerSideVars() { + return { + apiKey: env.AZURE_OPENAI_API_KEY || '', + apiEndpoint: env.AZURE_OPENAI_API_ENDPOINT || '', + // 'v1' is the next-gen API, which doesn't have a monthly version string anymore + apiEnableV1: env.AZURE_OPENAI_DISABLE_V1 !== 'true', + // https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle?tabs=key + versionAzureOpenAI: env.AZURE_OPENAI_API_VERSION || '2025-04-01-preview', + // old-school API used to list deployments - still needed for listing models, as even /v1/models would list any model available on azure and not just the deployed ones + versionDeployments: env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview', + } +} + +export function azureOpenAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues { + + // Server-side configuration, with defaults + const server = _azureServerSideVars(); + + // Client-provided values always take precedence over server env vars + const azureKey = access.oaiKey || server.apiKey || ''; + const azureHostFixed = fixupHost(access.oaiHost || server.apiEndpoint || '', apiPath); + + // Normalize to origin only (discard path/query) to prevent malformed URLs + let azureBase: string; + try { + azureBase = new URL(azureHostFixed).origin; + } catch (e) { + throw new Error(`Azure OpenAI API Host is invalid: ${azureHostFixed || 'missing'}`); + } + + if (!azureKey || !azureBase) + throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).'); + + /** + * Azure OpenAI API Routing: Convert OpenAI standard paths to Azure-specific paths + * + * Azure supports two API patterns: + * 1. Next-gen v1 API (/openai/v1/...): Direct endpoints without deployment IDs + * - Used for GPT-5-like models with advanced features + * - Enabled by default, can be disabled via AZURE_OPENAI_DISABLE_V1=true + * 2. Traditional deployment-based API (/openai/deployments/{id}/...): Legacy pattern + * - Required for older models and when v1 API is disabled + * - Requires deployment ID for all API calls + */ + switch (true) { + + // List models + case apiPath === '/v1/models': + // uses the good old Azure OpenAI Deployments listing API + apiPath = `/openai/deployments?api-version=${server.versionDeployments}`; + break; + + // Responses API - next-gen v1 API + case apiPath === '/v1/responses' && server.apiEnableV1: + // Next-gen v1 API: direct endpoint without deployment path + apiPath = '/openai/v1/responses'; // NOTE: we seem to not need the api-version query param here + // apiPath = `/openai/v1/responses?api-version=${server.versionResponses}`; + // console.log('[Azure] Using next-gen v1 API for Responses:', apiPath); + break; + + // Chat Completions API, and other v1 APIs + case apiPath === '/v1/chat/completions' || apiPath === '/v1/responses' || apiPath.startsWith('/v1/'): + + // require the model Id for traditional deployment-based routing + if (!modelRefId) + throw new Error('Azure OpenAI API needs a deployment id'); + + const functionName = apiPath.replace('/v1/', ''); // e.g. 'chat/completions' + apiPath = `/openai/deployments/${modelRefId}/${functionName}?api-version=${server.versionAzureOpenAI}`; + break; + + default: + throw new Error('Azure OpenAI API path not supported: ' + apiPath); + } + + return { + headers: { + 'Content-Type': 'application/json', + 'api-key': azureKey, + }, + url: azureBase + apiPath, + }; +} diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts index 8765fadb8..37a6c6617 100644 --- a/src/modules/llms/server/openai/openai.router.ts +++ b/src/modules/llms/server/openai/openai.router.ts @@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types'; import { alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models'; -import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './models/azure.models'; +import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models'; import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models'; import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models'; import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models'; @@ -163,17 +163,6 @@ export const llmOpenAIRouter = createTRPCRouter({ let models: ModelDescriptionSchema[]; - // [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping - if (access.dialect === 'azure') { - const azureOpenAIDeploymentsResponse = await openaiGETOrThrow(access, `/openai/deployments?api-version=${AZURE_DEPLOYMENTS_API_VERSION}`); - const azureOpenAIDeployments = azureParseFromDeploymentsAPI(azureOpenAIDeploymentsResponse); - models = azureOpenAIDeployments - .filter(azureDeploymentFilter) - .map(azureDeploymentToModelDescription) - .sort(openAISortModels); - return { models }; - } - // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards) if (access.dialect === 'perplexity') { models = perplexityAIModelDescriptions() @@ -212,6 +201,14 @@ export const llmOpenAIRouter = createTRPCRouter({ .sort(alibabaModelSort); break; + case 'azure': + const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels); + models = azureOpenAIDeployments + .filter(azureDeploymentFilter) + .map(azureDeploymentToModelDescription) + .sort(openAISortModels); + break; + case 'deepseek': models = openAIModels .filter(({ id }) => deepseekModelFilter(id)) @@ -488,12 +485,6 @@ const DEFAULT_PERPLEXITY_HOST = 'https://api.perplexity.ai'; const DEFAULT_TOGETHERAI_HOST = 'https://api.together.xyz'; const DEFAULT_XAI_HOST = 'https://api.x.ai'; -// Azure API version constants with environment overrides -const AZURE_API_V1_ENABLED = env.AZURE_API_V1 === 'true'; -const AZURE_RESPONSES_API_VERSION = env.AZURE_RESPONSES_API_VERSION || 'preview'; // 'preview' for v1, '2025-04-01-preview' for traditional -const AZURE_CHAT_API_VERSION = env.AZURE_CHAT_API_VERSION || '2025-02-01-preview'; -const AZURE_DEPLOYMENTS_API_VERSION = env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview'; - /** * Get a random key from a comma-separated list of API keys @@ -537,62 +528,9 @@ export function openAIAccess(access: OpenAIAccessSchema, modelRefId: string | nu url: alibabaOaiHost + apiPath, }; + case 'azure': - const azureKey = access.oaiKey || env.AZURE_OPENAI_API_KEY || ''; - - // Prefer server env over client-provided host for better reliability - const azureHostRaw = env.AZURE_OPENAI_API_ENDPOINT || access.oaiHost || ''; - const azureHostFixed = fixupHost(azureHostRaw, apiPath); - - // Normalize to origin only (strip any path/query) to prevent malformed URLs - let azureBase: string; - try { - const urlObj = new URL(azureHostFixed); - azureBase = urlObj.origin; - } catch (e) { - throw new Error(`Invalid Azure endpoint URL: ${azureHostFixed}`); - } - - if (!azureKey || !azureBase) - throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).'); - - // Determine if we should use next-gen v1 API or traditional deployment-based API - const useV1API = AZURE_API_V1_ENABLED || AZURE_RESPONSES_API_VERSION.toLowerCase() === 'preview'; - - let url = azureBase; - - // Special handling for Responses API which supports both paradigms - if (apiPath === '/v1/responses') { - if (useV1API) { - // Next-gen v1 API: direct endpoint without deployment path - url += `/openai/v1/responses?api-version=${AZURE_RESPONSES_API_VERSION}`; - console.log('[Azure] Using next-gen v1 API for Responses:', url); - } else { - // Traditional API: deployment-based endpoint - if (!modelRefId) - throw new Error('Azure OpenAI API needs a deployment id'); - url += `/openai/deployments/${modelRefId}/responses?api-version=${AZURE_RESPONSES_API_VERSION}`; - console.log('[Azure] Using traditional deployment-based API for Responses:', url); - } - } else if (apiPath.startsWith('/v1/')) { - // Other v1 endpoints use traditional deployment-based routing - if (!modelRefId) - throw new Error('Azure OpenAI API needs a deployment id'); - url += `/openai/deployments/${modelRefId}/${apiPath.replace('/v1/', '')}?api-version=${AZURE_CHAT_API_VERSION}`; - } else if (apiPath.startsWith('/openai/deployments')) { - // Direct deployment paths (e.g., for listing) - url += apiPath; - } else { - throw new Error('Azure OpenAI API path not supported: ' + apiPath); - } - - return { - headers: { - 'Content-Type': 'application/json', - 'api-key': azureKey, - }, - url, - }; + return azureOpenAIAccess(access, modelRefId, apiPath); case 'deepseek':