mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
LLMs: Azure OpenAI: cleaned up (and moved) azureOpenAIAccess - simpler and modularized code
This commit is contained in:
@@ -1,9 +1,14 @@
|
||||
import * as z from 'zod/v4';
|
||||
|
||||
import { env } from '~/server/env';
|
||||
|
||||
// import { LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
|
||||
import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';
|
||||
|
||||
import type { ModelDescriptionSchema } from '../../llm.server.types';
|
||||
import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.server.types';
|
||||
|
||||
import type { OpenAIAccessSchema } from '../openai.router';
|
||||
import { fixupHost } from '../openai.router';
|
||||
|
||||
import { fromManualMapping, ManualMappings } from './models.data';
|
||||
import { _knownOpenAIChatModels } from './openai.models';
|
||||
@@ -63,21 +68,20 @@ const _azureOpenAIDeployment_schema = z.object({
|
||||
});
|
||||
type AzureOpenAIDeployment = z.infer<typeof _azureOpenAIDeployment_schema>;
|
||||
|
||||
const _azureOpenAIDeploymentsList_schema = z.object({
|
||||
object: z.literal('list'),
|
||||
data: z.array(_azureOpenAIDeployment_schema),
|
||||
});
|
||||
|
||||
// const _azureOpenAIDeploymentsList_schema = z.object({
|
||||
// object: z.literal('list'),
|
||||
// data: z.array(_azureOpenAIDeployment_schema),
|
||||
// });
|
||||
|
||||
export function azureParseFromDeploymentsAPI(deploymentsApiResponse: object): AzureOpenAIDeployment[] {
|
||||
return _azureOpenAIDeploymentsList_schema.parse(deploymentsApiResponse).data;
|
||||
return z.array(_azureOpenAIDeployment_schema).parse(deploymentsApiResponse);
|
||||
}
|
||||
|
||||
|
||||
const _azureDenyListPrefix = [
|
||||
// unsupported for chat: text embedding models
|
||||
'text-embedding-',
|
||||
];
|
||||
] as const;
|
||||
|
||||
export function azureDeploymentFilter({ id }: AzureOpenAIDeployment) {
|
||||
// filter out models that are not chat models
|
||||
@@ -122,3 +126,88 @@ export function azureDeploymentToModelDescription(deployment: AzureOpenAIDeploym
|
||||
...restOfModelDescription,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
function _azureServerSideVars() {
|
||||
return {
|
||||
apiKey: env.AZURE_OPENAI_API_KEY || '',
|
||||
apiEndpoint: env.AZURE_OPENAI_API_ENDPOINT || '',
|
||||
// 'v1' is the next-gen API, which doesn't have a monthly version string anymore
|
||||
apiEnableV1: env.AZURE_OPENAI_DISABLE_V1 !== 'true',
|
||||
// https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle?tabs=key
|
||||
versionAzureOpenAI: env.AZURE_OPENAI_API_VERSION || '2025-04-01-preview',
|
||||
// old-school API used to list deployments - still needed for listing models, as even /v1/models would list any model available on azure and not just the deployed ones
|
||||
versionDeployments: env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview',
|
||||
}
|
||||
}
|
||||
|
||||
export function azureOpenAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues {
|
||||
|
||||
// Server-side configuration, with defaults
|
||||
const server = _azureServerSideVars();
|
||||
|
||||
// Client-provided values always take precedence over server env vars
|
||||
const azureKey = access.oaiKey || server.apiKey || '';
|
||||
const azureHostFixed = fixupHost(access.oaiHost || server.apiEndpoint || '', apiPath);
|
||||
|
||||
// Normalize to origin only (discard path/query) to prevent malformed URLs
|
||||
let azureBase: string;
|
||||
try {
|
||||
azureBase = new URL(azureHostFixed).origin;
|
||||
} catch (e) {
|
||||
throw new Error(`Azure OpenAI API Host is invalid: ${azureHostFixed || 'missing'}`);
|
||||
}
|
||||
|
||||
if (!azureKey || !azureBase)
|
||||
throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).');
|
||||
|
||||
/**
|
||||
* Azure OpenAI API Routing: Convert OpenAI standard paths to Azure-specific paths
|
||||
*
|
||||
* Azure supports two API patterns:
|
||||
* 1. Next-gen v1 API (/openai/v1/...): Direct endpoints without deployment IDs
|
||||
* - Used for GPT-5-like models with advanced features
|
||||
* - Enabled by default, can be disabled via AZURE_OPENAI_DISABLE_V1=true
|
||||
* 2. Traditional deployment-based API (/openai/deployments/{id}/...): Legacy pattern
|
||||
* - Required for older models and when v1 API is disabled
|
||||
* - Requires deployment ID for all API calls
|
||||
*/
|
||||
switch (true) {
|
||||
|
||||
// List models
|
||||
case apiPath === '/v1/models':
|
||||
// uses the good old Azure OpenAI Deployments listing API
|
||||
apiPath = `/openai/deployments?api-version=${server.versionDeployments}`;
|
||||
break;
|
||||
|
||||
// Responses API - next-gen v1 API
|
||||
case apiPath === '/v1/responses' && server.apiEnableV1:
|
||||
// Next-gen v1 API: direct endpoint without deployment path
|
||||
apiPath = '/openai/v1/responses'; // NOTE: we seem to not need the api-version query param here
|
||||
// apiPath = `/openai/v1/responses?api-version=${server.versionResponses}`;
|
||||
// console.log('[Azure] Using next-gen v1 API for Responses:', apiPath);
|
||||
break;
|
||||
|
||||
// Chat Completions API, and other v1 APIs
|
||||
case apiPath === '/v1/chat/completions' || apiPath === '/v1/responses' || apiPath.startsWith('/v1/'):
|
||||
|
||||
// require the model Id for traditional deployment-based routing
|
||||
if (!modelRefId)
|
||||
throw new Error('Azure OpenAI API needs a deployment id');
|
||||
|
||||
const functionName = apiPath.replace('/v1/', ''); // e.g. 'chat/completions'
|
||||
apiPath = `/openai/deployments/${modelRefId}/${functionName}?api-version=${server.versionAzureOpenAI}`;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error('Azure OpenAI API path not supported: ' + apiPath);
|
||||
}
|
||||
|
||||
return {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'api-key': azureKey,
|
||||
},
|
||||
url: azureBase + apiPath,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi
|
||||
|
||||
import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types';
|
||||
import { alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models';
|
||||
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './models/azure.models';
|
||||
import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models';
|
||||
import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models';
|
||||
import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
|
||||
import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models';
|
||||
@@ -163,17 +163,6 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
|
||||
let models: ModelDescriptionSchema[];
|
||||
|
||||
// [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping
|
||||
if (access.dialect === 'azure') {
|
||||
const azureOpenAIDeploymentsResponse = await openaiGETOrThrow(access, `/openai/deployments?api-version=${AZURE_DEPLOYMENTS_API_VERSION}`);
|
||||
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(azureOpenAIDeploymentsResponse);
|
||||
models = azureOpenAIDeployments
|
||||
.filter(azureDeploymentFilter)
|
||||
.map(azureDeploymentToModelDescription)
|
||||
.sort(openAISortModels);
|
||||
return { models };
|
||||
}
|
||||
|
||||
// [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards)
|
||||
if (access.dialect === 'perplexity') {
|
||||
models = perplexityAIModelDescriptions()
|
||||
@@ -212,6 +201,14 @@ export const llmOpenAIRouter = createTRPCRouter({
|
||||
.sort(alibabaModelSort);
|
||||
break;
|
||||
|
||||
case 'azure':
|
||||
const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels);
|
||||
models = azureOpenAIDeployments
|
||||
.filter(azureDeploymentFilter)
|
||||
.map(azureDeploymentToModelDescription)
|
||||
.sort(openAISortModels);
|
||||
break;
|
||||
|
||||
case 'deepseek':
|
||||
models = openAIModels
|
||||
.filter(({ id }) => deepseekModelFilter(id))
|
||||
@@ -488,12 +485,6 @@ const DEFAULT_PERPLEXITY_HOST = 'https://api.perplexity.ai';
|
||||
const DEFAULT_TOGETHERAI_HOST = 'https://api.together.xyz';
|
||||
const DEFAULT_XAI_HOST = 'https://api.x.ai';
|
||||
|
||||
// Azure API version constants with environment overrides
|
||||
const AZURE_API_V1_ENABLED = env.AZURE_API_V1 === 'true';
|
||||
const AZURE_RESPONSES_API_VERSION = env.AZURE_RESPONSES_API_VERSION || 'preview'; // 'preview' for v1, '2025-04-01-preview' for traditional
|
||||
const AZURE_CHAT_API_VERSION = env.AZURE_CHAT_API_VERSION || '2025-02-01-preview';
|
||||
const AZURE_DEPLOYMENTS_API_VERSION = env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview';
|
||||
|
||||
|
||||
/**
|
||||
* Get a random key from a comma-separated list of API keys
|
||||
@@ -537,62 +528,9 @@ export function openAIAccess(access: OpenAIAccessSchema, modelRefId: string | nu
|
||||
url: alibabaOaiHost + apiPath,
|
||||
};
|
||||
|
||||
|
||||
case 'azure':
|
||||
const azureKey = access.oaiKey || env.AZURE_OPENAI_API_KEY || '';
|
||||
|
||||
// Prefer server env over client-provided host for better reliability
|
||||
const azureHostRaw = env.AZURE_OPENAI_API_ENDPOINT || access.oaiHost || '';
|
||||
const azureHostFixed = fixupHost(azureHostRaw, apiPath);
|
||||
|
||||
// Normalize to origin only (strip any path/query) to prevent malformed URLs
|
||||
let azureBase: string;
|
||||
try {
|
||||
const urlObj = new URL(azureHostFixed);
|
||||
azureBase = urlObj.origin;
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid Azure endpoint URL: ${azureHostFixed}`);
|
||||
}
|
||||
|
||||
if (!azureKey || !azureBase)
|
||||
throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).');
|
||||
|
||||
// Determine if we should use next-gen v1 API or traditional deployment-based API
|
||||
const useV1API = AZURE_API_V1_ENABLED || AZURE_RESPONSES_API_VERSION.toLowerCase() === 'preview';
|
||||
|
||||
let url = azureBase;
|
||||
|
||||
// Special handling for Responses API which supports both paradigms
|
||||
if (apiPath === '/v1/responses') {
|
||||
if (useV1API) {
|
||||
// Next-gen v1 API: direct endpoint without deployment path
|
||||
url += `/openai/v1/responses?api-version=${AZURE_RESPONSES_API_VERSION}`;
|
||||
console.log('[Azure] Using next-gen v1 API for Responses:', url);
|
||||
} else {
|
||||
// Traditional API: deployment-based endpoint
|
||||
if (!modelRefId)
|
||||
throw new Error('Azure OpenAI API needs a deployment id');
|
||||
url += `/openai/deployments/${modelRefId}/responses?api-version=${AZURE_RESPONSES_API_VERSION}`;
|
||||
console.log('[Azure] Using traditional deployment-based API for Responses:', url);
|
||||
}
|
||||
} else if (apiPath.startsWith('/v1/')) {
|
||||
// Other v1 endpoints use traditional deployment-based routing
|
||||
if (!modelRefId)
|
||||
throw new Error('Azure OpenAI API needs a deployment id');
|
||||
url += `/openai/deployments/${modelRefId}/${apiPath.replace('/v1/', '')}?api-version=${AZURE_CHAT_API_VERSION}`;
|
||||
} else if (apiPath.startsWith('/openai/deployments')) {
|
||||
// Direct deployment paths (e.g., for listing)
|
||||
url += apiPath;
|
||||
} else {
|
||||
throw new Error('Azure OpenAI API path not supported: ' + apiPath);
|
||||
}
|
||||
|
||||
return {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'api-key': azureKey,
|
||||
},
|
||||
url,
|
||||
};
|
||||
return azureOpenAIAccess(access, modelRefId, apiPath);
|
||||
|
||||
|
||||
case 'deepseek':
|
||||
|
||||
Reference in New Issue
Block a user