LLMs: Azure OpenAI: cleaned up (and moved) azureOpenAIAccess - simpler and modularized code

This commit is contained in:
Enrico Ros
2025-09-12 14:00:30 -07:00
parent 39a7e30880
commit aa441b0656
2 changed files with 108 additions and 81 deletions
@@ -1,9 +1,14 @@
import * as z from 'zod/v4';
import { env } from '~/server/env';
// import { LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
import type { ModelDescriptionSchema } from '../../llm.server.types';
import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.server.types';
import type { OpenAIAccessSchema } from '../openai.router';
import { fixupHost } from '../openai.router';
import { fromManualMapping, ManualMappings } from './models.data';
import { _knownOpenAIChatModels } from './openai.models';
@@ -63,21 +68,20 @@ const _azureOpenAIDeployment_schema = z.object({
});
type AzureOpenAIDeployment = z.infer<typeof _azureOpenAIDeployment_schema>;
const _azureOpenAIDeploymentsList_schema = z.object({
object: z.literal('list'),
data: z.array(_azureOpenAIDeployment_schema),
});
// const _azureOpenAIDeploymentsList_schema = z.object({
// object: z.literal('list'),
// data: z.array(_azureOpenAIDeployment_schema),
// });
export function azureParseFromDeploymentsAPI(deploymentsApiResponse: object): AzureOpenAIDeployment[] {
return _azureOpenAIDeploymentsList_schema.parse(deploymentsApiResponse).data;
return z.array(_azureOpenAIDeployment_schema).parse(deploymentsApiResponse);
}
const _azureDenyListPrefix = [
// unsupported for chat: text embedding models
'text-embedding-',
];
] as const;
export function azureDeploymentFilter({ id }: AzureOpenAIDeployment) {
// filter out models that are not chat models
@@ -122,3 +126,88 @@ export function azureDeploymentToModelDescription(deployment: AzureOpenAIDeploym
...restOfModelDescription,
};
}
function _azureServerSideVars() {
return {
apiKey: env.AZURE_OPENAI_API_KEY || '',
apiEndpoint: env.AZURE_OPENAI_API_ENDPOINT || '',
// 'v1' is the next-gen API, which doesn't have a monthly version string anymore
apiEnableV1: env.AZURE_OPENAI_DISABLE_V1 !== 'true',
// https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle?tabs=key
versionAzureOpenAI: env.AZURE_OPENAI_API_VERSION || '2025-04-01-preview',
// old-school API used to list deployments - still needed for listing models, as even /v1/models would list any model available on azure and not just the deployed ones
versionDeployments: env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview',
}
}
export function azureOpenAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues {
// Server-side configuration, with defaults
const server = _azureServerSideVars();
// Client-provided values always take precedence over server env vars
const azureKey = access.oaiKey || server.apiKey || '';
const azureHostFixed = fixupHost(access.oaiHost || server.apiEndpoint || '', apiPath);
// Normalize to origin only (discard path/query) to prevent malformed URLs
let azureBase: string;
try {
azureBase = new URL(azureHostFixed).origin;
} catch (e) {
throw new Error(`Azure OpenAI API Host is invalid: ${azureHostFixed || 'missing'}`);
}
if (!azureKey || !azureBase)
throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).');
/**
* Azure OpenAI API Routing: Convert OpenAI standard paths to Azure-specific paths
*
* Azure supports two API patterns:
* 1. Next-gen v1 API (/openai/v1/...): Direct endpoints without deployment IDs
* - Used for GPT-5-like models with advanced features
* - Enabled by default, can be disabled via AZURE_OPENAI_DISABLE_V1=true
* 2. Traditional deployment-based API (/openai/deployments/{id}/...): Legacy pattern
* - Required for older models and when v1 API is disabled
* - Requires deployment ID for all API calls
*/
switch (true) {
// List models
case apiPath === '/v1/models':
// uses the good old Azure OpenAI Deployments listing API
apiPath = `/openai/deployments?api-version=${server.versionDeployments}`;
break;
// Responses API - next-gen v1 API
case apiPath === '/v1/responses' && server.apiEnableV1:
// Next-gen v1 API: direct endpoint without deployment path
apiPath = '/openai/v1/responses'; // NOTE: we seem to not need the api-version query param here
// apiPath = `/openai/v1/responses?api-version=${server.versionResponses}`;
// console.log('[Azure] Using next-gen v1 API for Responses:', apiPath);
break;
// Chat Completions API, and other v1 APIs
case apiPath === '/v1/chat/completions' || apiPath === '/v1/responses' || apiPath.startsWith('/v1/'):
// require the model Id for traditional deployment-based routing
if (!modelRefId)
throw new Error('Azure OpenAI API needs a deployment id');
const functionName = apiPath.replace('/v1/', ''); // e.g. 'chat/completions'
apiPath = `/openai/deployments/${modelRefId}/${functionName}?api-version=${server.versionAzureOpenAI}`;
break;
default:
throw new Error('Azure OpenAI API path not supported: ' + apiPath);
}
return {
headers: {
'Content-Type': 'application/json',
'api-key': azureKey,
},
url: azureBase + apiPath,
};
}
+11 -73
View File
@@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi
import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types';
import { alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models';
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './models/azure.models';
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models';
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models';
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models';
@@ -163,17 +163,6 @@ export const llmOpenAIRouter = createTRPCRouter({
let models: ModelDescriptionSchema[];
// [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping
if (access.dialect === 'azure') {
const azureOpenAIDeploymentsResponse = await openaiGETOrThrow(access, `/openai/deployments?api-version=${AZURE_DEPLOYMENTS_API_VERSION}`);
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(azureOpenAIDeploymentsResponse);
models = azureOpenAIDeployments
.filter(azureDeploymentFilter)
.map(azureDeploymentToModelDescription)
.sort(openAISortModels);
return { models };
}
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards)
if (access.dialect === 'perplexity') {
models = perplexityAIModelDescriptions()
@@ -212,6 +201,14 @@ export const llmOpenAIRouter = createTRPCRouter({
.sort(alibabaModelSort);
break;
case 'azure':
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels);
models = azureOpenAIDeployments
.filter(azureDeploymentFilter)
.map(azureDeploymentToModelDescription)
.sort(openAISortModels);
break;
case 'deepseek':
models = openAIModels
.filter(({ id }) => deepseekModelFilter(id))
@@ -488,12 +485,6 @@ const DEFAULT_PERPLEXITY_HOST = 'https://api.perplexity.ai';
const DEFAULT_TOGETHERAI_HOST = 'https://api.together.xyz';
const DEFAULT_XAI_HOST = 'https://api.x.ai';
// Azure API version constants with environment overrides
const AZURE_API_V1_ENABLED = env.AZURE_API_V1 === 'true';
const AZURE_RESPONSES_API_VERSION = env.AZURE_RESPONSES_API_VERSION || 'preview'; // 'preview' for v1, '2025-04-01-preview' for traditional
const AZURE_CHAT_API_VERSION = env.AZURE_CHAT_API_VERSION || '2025-02-01-preview';
const AZURE_DEPLOYMENTS_API_VERSION = env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview';
/**
* Get a random key from a comma-separated list of API keys
@@ -537,62 +528,9 @@ export function openAIAccess(access: OpenAIAccessSchema, modelRefId: string | nu
url: alibabaOaiHost + apiPath,
};
case 'azure':
const azureKey = access.oaiKey || env.AZURE_OPENAI_API_KEY || '';
// Prefer server env over client-provided host for better reliability
const azureHostRaw = env.AZURE_OPENAI_API_ENDPOINT || access.oaiHost || '';
const azureHostFixed = fixupHost(azureHostRaw, apiPath);
// Normalize to origin only (strip any path/query) to prevent malformed URLs
let azureBase: string;
try {
const urlObj = new URL(azureHostFixed);
azureBase = urlObj.origin;
} catch (e) {
throw new Error(`Invalid Azure endpoint URL: ${azureHostFixed}`);
}
if (!azureKey || !azureBase)
throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).');
// Determine if we should use next-gen v1 API or traditional deployment-based API
const useV1API = AZURE_API_V1_ENABLED || AZURE_RESPONSES_API_VERSION.toLowerCase() === 'preview';
let url = azureBase;
// Special handling for Responses API which supports both paradigms
if (apiPath === '/v1/responses') {
if (useV1API) {
// Next-gen v1 API: direct endpoint without deployment path
url += `/openai/v1/responses?api-version=${AZURE_RESPONSES_API_VERSION}`;
console.log('[Azure] Using next-gen v1 API for Responses:', url);
} else {
// Traditional API: deployment-based endpoint
if (!modelRefId)
throw new Error('Azure OpenAI API needs a deployment id');
url += `/openai/deployments/${modelRefId}/responses?api-version=${AZURE_RESPONSES_API_VERSION}`;
console.log('[Azure] Using traditional deployment-based API for Responses:', url);
}
} else if (apiPath.startsWith('/v1/')) {
// Other v1 endpoints use traditional deployment-based routing
if (!modelRefId)
throw new Error('Azure OpenAI API needs a deployment id');
url += `/openai/deployments/${modelRefId}/${apiPath.replace('/v1/', '')}?api-version=${AZURE_CHAT_API_VERSION}`;
} else if (apiPath.startsWith('/openai/deployments')) {
// Direct deployment paths (e.g., for listing)
url += apiPath;
} else {
throw new Error('Azure OpenAI API path not supported: ' + apiPath);
}
return {
headers: {
'Content-Type': 'application/json',
'api-key': azureKey,
},
url,
};
return azureOpenAIAccess(access, modelRefId, apiPath);
case 'deepseek':