LLMs: Azure OpenAI: cleaned up (and moved) azureOpenAIAccess - simpler and modularized code

2026-05-10 21:50:14 -07:00 · 2025-09-12 14:00:30 -07:00
parent 39a7e30880
commit aa441b0656
2 changed files with 108 additions and 81 deletions
@@ -1,9 +1,14 @@
 import * as z from 'zod/v4';

+import { env } from '~/server/env';
+
 // import { LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Json, LLM_IF_OAI_Vision } from '~/common/stores/llms/llms.types';
 import { LLM_IF_OAI_Chat } from '~/common/stores/llms/llms.types';

-import type { ModelDescriptionSchema } from '../../llm.server.types';
+import type { ModelDescriptionSchema, RequestAccessValues } from '../../llm.server.types';
+
+import type { OpenAIAccessSchema } from '../openai.router';
+import { fixupHost } from '../openai.router';

 import { fromManualMapping, ManualMappings } from './models.data';
 import { _knownOpenAIChatModels } from './openai.models';
@@ -63,21 +68,20 @@ const _azureOpenAIDeployment_schema = z.object({
 });
 type AzureOpenAIDeployment = z.infer<typeof _azureOpenAIDeployment_schema>;

-const _azureOpenAIDeploymentsList_schema = z.object({
-  object: z.literal('list'),
-  data: z.array(_azureOpenAIDeployment_schema),
-});
-
+// const _azureOpenAIDeploymentsList_schema = z.object({
+//   object: z.literal('list'),
+//   data: z.array(_azureOpenAIDeployment_schema),
+// });

 export function azureParseFromDeploymentsAPI(deploymentsApiResponse: object): AzureOpenAIDeployment[] {
-  return _azureOpenAIDeploymentsList_schema.parse(deploymentsApiResponse).data;
+  return z.array(_azureOpenAIDeployment_schema).parse(deploymentsApiResponse);
 }


 const _azureDenyListPrefix = [
  // unsupported for chat: text embedding models
  'text-embedding-',
-];
+] as const;

 export function azureDeploymentFilter({ id }: AzureOpenAIDeployment) {
  // filter out models that are not chat models
@@ -122,3 +126,88 @@ export function azureDeploymentToModelDescription(deployment: AzureOpenAIDeploym
    ...restOfModelDescription,
  };
 }
+
+
+function _azureServerSideVars() {
+  return {
+    apiKey: env.AZURE_OPENAI_API_KEY || '',
+    apiEndpoint: env.AZURE_OPENAI_API_ENDPOINT || '',
+    // 'v1' is the next-gen API, which doesn't have a monthly version string anymore
+    apiEnableV1: env.AZURE_OPENAI_DISABLE_V1 !== 'true',
+    // https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle?tabs=key
+    versionAzureOpenAI: env.AZURE_OPENAI_API_VERSION || '2025-04-01-preview',
+    // old-school API used to list deployments - still needed for listing models, as even /v1/models would list any model available on azure and not just the deployed ones
+    versionDeployments: env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview',
+  }
+}
+
+export function azureOpenAIAccess(access: OpenAIAccessSchema, modelRefId: string | null, apiPath: string): RequestAccessValues {
+
+  // Server-side configuration, with defaults
+  const server = _azureServerSideVars();
+
+  // Client-provided values always take precedence over server env vars
+  const azureKey = access.oaiKey || server.apiKey || '';
+  const azureHostFixed = fixupHost(access.oaiHost || server.apiEndpoint || '', apiPath);
+
+  // Normalize to origin only (discard path/query) to prevent malformed URLs
+  let azureBase: string;
+  try {
+    azureBase = new URL(azureHostFixed).origin;
+  } catch (e) {
+    throw new Error(`Azure OpenAI API Host is invalid: ${azureHostFixed || 'missing'}`);
+  }
+
+  if (!azureKey || !azureBase)
+    throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).');
+
+  /**
+   * Azure OpenAI API Routing: Convert OpenAI standard paths to Azure-specific paths
+   *
+   * Azure supports two API patterns:
+   * 1. Next-gen v1 API (/openai/v1/...): Direct endpoints without deployment IDs
+   *    - Used for GPT-5-like models with advanced features
+   *    - Enabled by default, can be disabled via AZURE_OPENAI_DISABLE_V1=true
+   * 2. Traditional deployment-based API (/openai/deployments/{id}/...): Legacy pattern
+   *    - Required for older models and when v1 API is disabled
+   *    - Requires deployment ID for all API calls
+   */
+  switch (true) {
+
+    // List models
+    case apiPath === '/v1/models':
+      // uses the good old Azure OpenAI Deployments listing API
+      apiPath = `/openai/deployments?api-version=${server.versionDeployments}`;
+      break;
+
+    // Responses API - next-gen v1 API
+    case apiPath === '/v1/responses' && server.apiEnableV1:
+      // Next-gen v1 API: direct endpoint without deployment path
+      apiPath = '/openai/v1/responses'; // NOTE: we seem to not need the api-version query param here
+      // apiPath = `/openai/v1/responses?api-version=${server.versionResponses}`;
+      // console.log('[Azure] Using next-gen v1 API for Responses:', apiPath);
+      break;
+
+    // Chat Completions API, and other v1 APIs
+    case apiPath === '/v1/chat/completions' || apiPath === '/v1/responses' || apiPath.startsWith('/v1/'):
+
+      // require the model Id for traditional deployment-based routing
+      if (!modelRefId)
+        throw new Error('Azure OpenAI API needs a deployment id');
+
+      const functionName = apiPath.replace('/v1/', ''); // e.g. 'chat/completions'
+      apiPath = `/openai/deployments/${modelRefId}/${functionName}?api-version=${server.versionAzureOpenAI}`;
+      break;
+
+    default:
+      throw new Error('Azure OpenAI API path not supported: ' + apiPath);
+  }
+
+  return {
+    headers: {
+      'Content-Type': 'application/json',
+      'api-key': azureKey,
+    },
+    url: azureBase + apiPath,
+  };
+}
@@ -15,7 +15,7 @@ import { OpenAIWire_API_Images_Generations, OpenAIWire_API_Models_List, OpenAIWi

 import { ListModelsResponse_schema, ModelDescriptionSchema, RequestAccessValues } from '../llm.server.types';
 import { alibabaModelSort, alibabaModelToModelDescription } from './models/alibaba.models';
-import { azureDeploymentFilter, azureDeploymentToModelDescription, azureParseFromDeploymentsAPI } from './models/azure.models';
+import { azureDeploymentFilter, azureDeploymentToModelDescription, azureOpenAIAccess, azureParseFromDeploymentsAPI } from './models/azure.models';
 import { chutesAIHeuristic, chutesAIModelsToModelDescriptions } from './models/chutesai.models';
 import { deepseekModelFilter, deepseekModelSort, deepseekModelToModelDescription } from './models/deepseek.models';
 import { fastAPIHeuristic, fastAPIModels } from './models/fastapi.models';
@@ -163,17 +163,6 @@ export const llmOpenAIRouter = createTRPCRouter({

      let models: ModelDescriptionSchema[];

-      // [Azure]: use an older 'deployments' API to enumerate the models, and a modified OpenAI id to description mapping
-      if (access.dialect === 'azure') {
-        const azureOpenAIDeploymentsResponse = await openaiGETOrThrow(access, `/openai/deployments?api-version=${AZURE_DEPLOYMENTS_API_VERSION}`);
-        const azureOpenAIDeployments = azureParseFromDeploymentsAPI(azureOpenAIDeploymentsResponse);
-        models = azureOpenAIDeployments
-          .filter(azureDeploymentFilter)
-          .map(azureDeploymentToModelDescription)
-          .sort(openAISortModels);
-        return { models };
-      }
-
      // [Perplexity]: there's no API for models listing (upstream: https://docs.perplexity.ai/guides/model-cards)
      if (access.dialect === 'perplexity') {
        models = perplexityAIModelDescriptions()
@@ -212,6 +201,14 @@ export const llmOpenAIRouter = createTRPCRouter({
            .sort(alibabaModelSort);
          break;

+        case 'azure':
+          const azureOpenAIDeployments = azureParseFromDeploymentsAPI(openAIModels);
+          models = azureOpenAIDeployments
+            .filter(azureDeploymentFilter)
+            .map(azureDeploymentToModelDescription)
+            .sort(openAISortModels);
+          break;
+
        case 'deepseek':
          models = openAIModels
            .filter(({ id }) => deepseekModelFilter(id))
@@ -488,12 +485,6 @@ const DEFAULT_PERPLEXITY_HOST = 'https://api.perplexity.ai';
 const DEFAULT_TOGETHERAI_HOST = 'https://api.together.xyz';
 const DEFAULT_XAI_HOST = 'https://api.x.ai';

-// Azure API version constants with environment overrides
-const AZURE_API_V1_ENABLED = env.AZURE_API_V1 === 'true';
-const AZURE_RESPONSES_API_VERSION = env.AZURE_RESPONSES_API_VERSION || 'preview'; // 'preview' for v1, '2025-04-01-preview' for traditional
-const AZURE_CHAT_API_VERSION = env.AZURE_CHAT_API_VERSION || '2025-02-01-preview';
-const AZURE_DEPLOYMENTS_API_VERSION = env.AZURE_DEPLOYMENTS_API_VERSION || '2023-03-15-preview';
-

 /**
 * Get a random key from a comma-separated list of API keys
@@ -537,62 +528,9 @@ export function openAIAccess(access: OpenAIAccessSchema, modelRefId: string | nu
        url: alibabaOaiHost + apiPath,
      };

+
    case 'azure':
-      const azureKey = access.oaiKey || env.AZURE_OPENAI_API_KEY || '';
-      
-      // Prefer server env over client-provided host for better reliability
-      const azureHostRaw = env.AZURE_OPENAI_API_ENDPOINT || access.oaiHost || '';
-      const azureHostFixed = fixupHost(azureHostRaw, apiPath);
-      
-      // Normalize to origin only (strip any path/query) to prevent malformed URLs
-      let azureBase: string;
-      try {
-        const urlObj = new URL(azureHostFixed);
-        azureBase = urlObj.origin;
-      } catch (e) {
-        throw new Error(`Invalid Azure endpoint URL: ${azureHostFixed}`);
-      }
-      
-      if (!azureKey || !azureBase)
-        throw new Error('Missing Azure API Key or Host. Add it on the UI (Models Setup) or server side (your deployment).');
-
-      // Determine if we should use next-gen v1 API or traditional deployment-based API
-      const useV1API = AZURE_API_V1_ENABLED || AZURE_RESPONSES_API_VERSION.toLowerCase() === 'preview';
-      
-      let url = azureBase;
-      
-      // Special handling for Responses API which supports both paradigms
-      if (apiPath === '/v1/responses') {
-        if (useV1API) {
-          // Next-gen v1 API: direct endpoint without deployment path
-          url += `/openai/v1/responses?api-version=${AZURE_RESPONSES_API_VERSION}`;
-          console.log('[Azure] Using next-gen v1 API for Responses:', url);
-        } else {
-          // Traditional API: deployment-based endpoint
-          if (!modelRefId)
-            throw new Error('Azure OpenAI API needs a deployment id');
-          url += `/openai/deployments/${modelRefId}/responses?api-version=${AZURE_RESPONSES_API_VERSION}`;
-          console.log('[Azure] Using traditional deployment-based API for Responses:', url);
-        }
-      } else if (apiPath.startsWith('/v1/')) {
-        // Other v1 endpoints use traditional deployment-based routing
-        if (!modelRefId)
-          throw new Error('Azure OpenAI API needs a deployment id');
-        url += `/openai/deployments/${modelRefId}/${apiPath.replace('/v1/', '')}?api-version=${AZURE_CHAT_API_VERSION}`;
-      } else if (apiPath.startsWith('/openai/deployments')) {
-        // Direct deployment paths (e.g., for listing)
-        url += apiPath;
-      } else {
-        throw new Error('Azure OpenAI API path not supported: ' + apiPath);
-      }
-
-      return {
-        headers: {
-          'Content-Type': 'application/json',
-          'api-key': azureKey,
-        },
-        url,
-      };
+      return azureOpenAIAccess(access, modelRefId, apiPath);


    case 'deepseek':