feat: Implement TTS vendor abstraction system

Adds support for multiple TTS providers (OpenAI, ElevenLabs) with vendor abstraction pattern similar to LLM vendors. Core changes: - Created /src/modules/tts/ module with vendor abstraction - Implemented ITTSVendor interface for unified TTS API - Added vendor implementations for ElevenLabs and OpenAI TTS - Created store-tts.ts for service and voice configuration - Implemented unified tts.client.ts for vendor-agnostic speech - Added OpenAI TTS tRPC router with streaming support - Updated PersonaChatMessageSpeak to use new TTS client - Added migration logic for existing ElevenLabs configs - Updated data.ts to support new voice configuration format Technical details: - Service-scoped pattern: activeServiceId + activeVoiceId - Backward compatible with existing elevenLabs voice configs - Auto-import capability from LLM configurations - Supports streaming and non-streaming TTS - Vendor-specific features handled gracefully Relates to #858 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Enrico Ros <enricoros@users.noreply.github.com>
2026-05-10 21:50:14 -07:00 · 2025-10-29 13:31:32 +00:00
12 changed files with 913 additions and 3 deletions
@@ -1,4 +1,4 @@
-import { elevenLabsSpeakText } from '~/modules/elevenlabs/elevenlabs.client';
+import { speakText } from '~/modules/tts/tts.client';

 import { isTextContentFragment } from '~/common/stores/chat/chat.fragments';

@@ -59,6 +59,6 @@ export class PersonaChatMessageSpeak implements PersonaProcessorInterface {
    console.log('📢 TTS:', text);
    this.spokenLine = true;
    // fire/forget: we don't want to stall this loop
-    void elevenLabsSpeakText(text, undefined, false, true);
+    void speakText(text, { streaming: false, turbo: true });
  }
 }
@@ -14,7 +14,11 @@ export type SystemPurposeData = {
  examples?: SystemPurposeExample[];
  highlighted?: boolean;
  call?: { starters?: string[] };
-  voices?: { elevenLabs?: { voiceId: string } };
+  voices?: {
+    tts?: { voiceId?: string };
+    // Legacy support for existing configs
+    elevenLabs?: { voiceId: string };
+  };
 };

 export type SystemPurposeExample = string | { prompt: string, action?: 'require-data-attachment' };
@@ -0,0 +1,57 @@
+import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
+
+import type { DTTSService, TTSGenerationOptions, TTSSpeakResult, TTSVendorId, TTSVoice } from './tts.types';
+
+
+/**
+ * TTS Vendor Interface - abstraction for all TTS providers
+ * Similar to IModelVendor but adapted for TTS services
+ */
+export interface ITTSVendor<TServiceSettings extends Record<string, any> = {}, TAccess = unknown> {
+  readonly id: TTSVendorId;
+  readonly name: string;
+  readonly displayRank: number;       // Display order in UI
+  readonly location: 'local' | 'cloud';
+  readonly brandColor?: string;
+
+  // Server configuration detection
+  readonly hasServerConfigKey?: keyof BackendCapabilities;
+
+  // Capability flags
+  readonly capabilities: {
+    streaming: boolean;
+    voiceCloning?: boolean;
+    speedControl?: boolean;
+    listVoices: boolean;
+  };
+
+  /// Abstraction interface ///
+
+  /**
+   * Initialize default settings for a new service
+   */
+  initializeSetup?(): TServiceSettings;
+
+  /**
+   * Validate service setup (client-side)
+   */
+  validateSetup?(setup: TServiceSettings): boolean;
+
+  /**
+   * Get transport access configuration from setup
+   */
+  getTransportAccess(setup?: Partial<TServiceSettings>): TAccess;
+
+  /**
+   * RPC: Speak text using this vendor's TTS service
+   */
+  rpcSpeak(
+    access: TAccess,
+    options: TTSGenerationOptions,
+  ): Promise<AsyncIterable<any>>;
+
+  /**
+   * RPC: List available voices (if supported)
+   */
+  rpcListVoices?(access: TAccess): Promise<{ voices: TTSVoice[] }>;
+}
@@ -0,0 +1,195 @@
+import * as z from 'zod/v4';
+
+import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
+import { env } from '~/server/env';
+import { fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
+
+
+// Configuration
+const SAFETY_TEXT_LENGTH = 4096; // OpenAI limit
+const MIN_CHUNK_SIZE = 4096; // Minimum chunk size in bytes for streaming
+
+
+// Schema definitions
+export const openaiTTSSpeechInputSchema = z.object({
+  access: z.object({
+    oaiKey: z.string().optional(),
+    oaiHost: z.string().optional(),
+    oaiOrgId: z.string().optional(),
+  }),
+  text: z.string(),
+  voice: z.enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']).default('alloy'),
+  model: z.enum(['tts-1', 'tts-1-hd']).default('tts-1'),
+  speed: z.number().min(0.25).max(4.0).optional(),
+  format: z.enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm']).optional(),
+  streaming: z.boolean().default(false),
+});
+
+export type OpenAITTSSpeechInputSchema = z.infer<typeof openaiTTSSpeechInputSchema>;
+
+
+export const openaiTTSRouter = createTRPCRouter({
+
+  /**
+   * Speech synthesis procedure using OpenAI TTS API
+   */
+  speech: publicProcedure
+    .input(openaiTTSSpeechInputSchema)
+    .mutation(async function* ({ input, ctx }) {
+
+      // Start streaming back
+      yield { control: 'start' };
+
+      let text = input.text;
+
+      // Safety check: trim text that's too long
+      if (text.length > SAFETY_TEXT_LENGTH) {
+        text = text.slice(0, SAFETY_TEXT_LENGTH);
+        yield { warningMessage: 'text was truncated to maximum length' };
+      }
+
+      let response: Response;
+      try {
+
+        // Prepare the upstream request
+        const { headers, url } = openaiTTSAccess(input.access);
+        const body: OpenAITTSWire.TTSRequest = {
+          input: text,
+          voice: input.voice,
+          model: input.model,
+          response_format: input.format || 'mp3',
+          ...(input.speed ? { speed: input.speed } : {}),
+        };
+
+        // Blocking fetch
+        response = await fetchResponseOrTRPCThrow({
+          url,
+          method: 'POST',
+          headers,
+          body,
+          signal: ctx.reqSignal,
+          name: 'OpenAI TTS',
+        });
+
+      } catch (error: any) {
+        yield { errorMessage: `fetch issue: ${error.message || 'Unknown error'}` };
+        return;
+      }
+
+      // If not streaming, return the entire audio
+      if (!input.streaming) {
+        const audioArrayBuffer = await response.arrayBuffer();
+        yield {
+          audio: {
+            base64: Buffer.from(audioArrayBuffer).toString('base64'),
+            contentType: response.headers.get('content-type') || 'audio/mpeg',
+          },
+        };
+        yield { control: 'end' };
+        return;
+      }
+
+      const reader = response.body?.getReader();
+      if (!reader) {
+        yield { errorMessage: 'stream issue: No reader' };
+        return;
+      }
+
+      // STREAM the audio chunks back to the client
+      try {
+
+        // Initialize a buffer to accumulate chunks
+        const accumulatedChunks: Uint8Array[] = [];
+        let accumulatedSize = 0;
+
+        // Read loop
+        while (true) {
+          const { value, done: readerDone } = await reader.read();
+          if (readerDone) break;
+          if (!value) continue;
+
+          // Accumulate chunks
+          accumulatedChunks.push(value);
+          accumulatedSize += value.length;
+
+          // When accumulated size reaches or exceeds MIN_CHUNK_SIZE, yield the chunk
+          if (accumulatedSize >= MIN_CHUNK_SIZE) {
+            yield {
+              audioChunk: {
+                base64: Buffer.concat(accumulatedChunks).toString('base64'),
+              },
+            };
+            // Reset the accumulation
+            accumulatedChunks.length = 0;
+            accumulatedSize = 0;
+          }
+        }
+
+        // If there's any remaining data, yield it as well
+        if (accumulatedSize) {
+          yield {
+            audioChunk: {
+              base64: Buffer.concat(accumulatedChunks).toString('base64'),
+            },
+          };
+        }
+      } catch (error: any) {
+        yield { errorMessage: `stream issue: ${error.message || 'Unknown error'}` };
+        return;
+      }
+
+      // End streaming
+      yield { control: 'end' };
+    }),
+
+});
+
+
+/**
+ * Helper function to construct OpenAI TTS API access details
+ */
+export function openaiTTSAccess(access: OpenAITTSSpeechInputSchema['access']): { headers: HeadersInit; url: string } {
+  // API key
+  const apiKey = (access.oaiKey || env.OPENAI_API_KEY || '').trim();
+  if (!apiKey) {
+    throw new Error('Missing OpenAI API key.');
+  }
+
+  // API host
+  let host = (access.oaiHost || env.OPENAI_API_HOST || 'api.openai.com').trim();
+  if (!host.startsWith('http')) {
+    host = `https://${host}`;
+  }
+  if (host.endsWith('/')) {
+    host = host.slice(0, -1);
+  }
+
+  // Build headers
+  const headers: HeadersInit = {
+    'Accept': 'audio/*',
+    'Content-Type': 'application/json',
+    'Authorization': `Bearer ${apiKey}`,
+  };
+
+  // Add org ID if provided
+  if (access.oaiOrgId) {
+    headers['OpenAI-Organization'] = access.oaiOrgId;
+  }
+
+  return {
+    headers,
+    url: `${host}/v1/audio/speech`,
+  };
+}
+
+
+/// OpenAI TTS API Wire Types
+export namespace OpenAITTSWire {
+  export interface TTSRequest {
+    input: string;
+    voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
+    model: 'tts-1' | 'tts-1-hd';
+    response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
+    speed?: number; // 0.25 to 4.0
+  }
+}
@@ -0,0 +1,110 @@
+//
+// WARNING: Everything here is data at rest. Know what you're doing.
+//
+
+import { create } from 'zustand';
+import { persist } from 'zustand/middleware';
+
+import type { ITTSVendor } from './ITTSVendor';
+import type { DTTSService, TTSServiceId, TTSVendorId } from './tts.types';
+
+
+/// TTSStore - a store for configured TTS services and settings
+
+export interface TTSStoreState {
+  // TTS services (configured instances of TTS vendors)
+  services: DTTSService<any>[];
+
+  // Global active service and voice
+  activeServiceId: TTSServiceId | null;
+  activeVoiceId: string | null;
+}
+
+interface TTSStoreActions {
+  // Service management
+  createService: (vendor: ITTSVendor) => DTTSService;
+  removeService: (id: TTSServiceId) => void;
+  updateServiceSettings: <TServiceSettings>(id: TTSServiceId, partialSettings: Partial<TServiceSettings>) => void;
+
+  // Active selection
+  setActiveServiceId: (id: TTSServiceId | null) => void;
+  setActiveVoiceId: (voiceId: string | null) => void;
+}
+
+
+type TTSStore = TTSStoreState & TTSStoreActions;
+
+
+export const useTTSStore = create<TTSStore>()(persist(
+  (set, get) => ({
+
+    // Initial state
+    services: [],
+    activeServiceId: null,
+    activeVoiceId: null,
+
+    // Actions
+
+    createService: (vendor: ITTSVendor) => {
+      const service: DTTSService = {
+        id: `${vendor.id}-${Date.now()}`,
+        label: vendor.name,
+        vId: vendor.id,
+        setup: vendor.initializeSetup?.() || {},
+      };
+
+      set(state => ({
+        services: [...state.services, service],
+      }));
+
+      return service;
+    },
+
+    removeService: (id: TTSServiceId) =>
+      set(state => {
+        const newServices = state.services.filter(s => s.id !== id);
+        return {
+          services: newServices,
+          // Clear active service if it was removed
+          activeServiceId: state.activeServiceId === id ? null : state.activeServiceId,
+        };
+      }),
+
+    updateServiceSettings: <TServiceSettings>(id: TTSServiceId, partialSettings: Partial<TServiceSettings>) =>
+      set(state => ({
+        services: state.services.map(service =>
+          service.id === id
+            ? { ...service, setup: { ...service.setup, ...partialSettings } }
+            : service,
+        ),
+      })),
+
+    setActiveServiceId: (id: TTSServiceId | null) =>
+      set({ activeServiceId: id }),
+
+    setActiveVoiceId: (voiceId: string | null) =>
+      set({ activeVoiceId: voiceId }),
+
+  }),
+  {
+    name: 'app-tts',
+  }),
+));
+
+
+// Helper functions for accessing TTS store
+
+export function getTTSStoreState(): TTSStoreState {
+  return useTTSStore.getState();
+}
+
+export function getTTSService(serviceId: TTSServiceId): DTTSService | null {
+  const { services } = useTTSStore.getState();
+  return services.find(s => s.id === serviceId) || null;
+}
+
+export function getActiveTTSService(): DTTSService | null {
+  const { services, activeServiceId } = useTTSStore.getState();
+  if (!activeServiceId) return null;
+  return services.find(s => s.id === activeServiceId) || null;
+}
@@ -0,0 +1,195 @@
+import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
+
+import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
+import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
+import { convert_Base64_To_UInt8Array } from '~/common/util/blobUtils';
+import { useUIPreferencesStore } from '~/common/stores/store-ui';
+
+import { SystemPurposes, type SystemPurposeId } from '~/data';
+
+import { findTTSVendor } from './vendors.registry';
+import { getActiveTTSService, getTTSService, useTTSStore } from './store-tts.ts';
+import type { TTSGenerationOptions, TTSSpeakResult, TTSServiceId } from './tts.types';
+
+
+/**
+ * Get persona-specific TTS configuration
+ */
+function getPersonaTTSConfig(personaId?: SystemPurposeId): { serviceId?: TTSServiceId; voiceId?: string } | null {
+  if (!personaId) return null;
+
+  const persona = SystemPurposes[personaId];
+  if (!persona?.voices) return null;
+
+  // Check new tts field first
+  if (persona.voices.tts?.voiceId) {
+    return {
+      voiceId: persona.voices.tts.voiceId,
+    };
+  }
+
+  // Fall back to legacy elevenLabs field for backward compatibility
+  if (persona.voices.elevenLabs?.voiceId) {
+    return {
+      voiceId: persona.voices.elevenLabs.voiceId,
+    };
+  }
+
+  return null;
+}
+
+
+/**
+ * Main TTS invocation function - vendor-agnostic
+ * Speaks text using the configured TTS service
+ */
+export async function speakText(
+  text: string,
+  options?: {
+    serviceId?: TTSServiceId;  // Override global service
+    voiceId?: string;          // Override global voice
+    personaId?: SystemPurposeId; // Use persona's voice preference
+    streaming?: boolean;
+    turbo?: boolean;
+    speed?: number;
+  },
+): Promise<TTSSpeakResult> {
+  // Early validation
+  if (!text?.trim()) {
+    return { success: false };
+  }
+
+  // 1. Resolve service
+  const { services, activeServiceId, activeVoiceId } = useTTSStore.getState();
+
+  let serviceId = options?.serviceId;
+  let voiceId = options?.voiceId;
+
+  // Check persona configuration
+  if (options?.personaId) {
+    const personaConfig = getPersonaTTSConfig(options.personaId);
+    if (personaConfig) {
+      serviceId = personaConfig.serviceId || serviceId;
+      voiceId = personaConfig.voiceId || voiceId;
+    }
+  }
+
+  // Fall back to global defaults
+  serviceId = serviceId || activeServiceId || undefined;
+  voiceId = voiceId || activeVoiceId || undefined;
+
+  if (!serviceId) {
+    console.warn('TTS: No service configured');
+    return { success: false };
+  }
+
+  const service = getTTSService(serviceId);
+  if (!service) {
+    console.warn('TTS: Service not found:', serviceId);
+    return { success: false };
+  }
+
+  // 2. Get vendor implementation
+  const vendor = findTTSVendor(service.vId);
+  if (!vendor) {
+    console.warn('TTS: Vendor not found:', service.vId);
+    return { success: false };
+  }
+
+  // 3. Get transport access
+  const access = vendor.getTransportAccess(service.setup);
+
+  // 4. Prepare generation options
+  const { preferredLanguage } = useUIPreferencesStore.getState();
+  const nonEnglish = !(preferredLanguage?.toLowerCase()?.startsWith('en'));
+
+  const generationOptions: TTSGenerationOptions = {
+    text,
+    voiceId,
+    streaming: options?.streaming ?? false,
+    turbo: options?.turbo ?? false,
+    speed: options?.speed,
+    nonEnglish,
+  };
+
+  // 5. Execute TTS
+  try {
+    const stream = await vendor.rpcSpeak(access, generationOptions);
+
+    let liveAudioPlayer: AudioLivePlayer | undefined;
+    let playbackStarted = false;
+    let audioBase64: string | undefined;
+
+    for await (const piece of stream) {
+      // Streaming audio chunk
+      if (piece.audioChunk) {
+        try {
+          if (!liveAudioPlayer) {
+            liveAudioPlayer = new AudioLivePlayer();
+          }
+
+          const chunkArray = convert_Base64_To_UInt8Array(piece.audioChunk.base64, 'tts.client (chunk)');
+          liveAudioPlayer.enqueueChunk(chunkArray.buffer);
+          playbackStarted = true;
+        } catch (audioError) {
+          console.error('TTS audio chunk error:', audioError);
+          return { success: false };
+        }
+      }
+
+      // Full audio buffer
+      else if (piece.audio) {
+        try {
+          if (!options?.streaming) {
+            audioBase64 = piece.audio.base64;
+          }
+
+          const audioArray = convert_Base64_To_UInt8Array(piece.audio.base64, 'tts.client');
+          void AudioPlayer.playBuffer(audioArray.buffer);
+          playbackStarted = true;
+        } catch (audioError) {
+          console.error('TTS audio buffer error:', audioError);
+          return { success: false };
+        }
+      }
+
+      // Errors
+      else if (piece.errorMessage) {
+        console.error('TTS error:', piece.errorMessage);
+        return { success: false, error: piece.errorMessage };
+      } else if (piece.warningMessage) {
+        console.warn('TTS warning:', piece.warningMessage);
+      } else if (piece.control === 'start' || piece.control === 'end') {
+        // Control messages - continue processing
+      }
+    }
+
+    return { success: playbackStarted, audioBase64 };
+  } catch (error) {
+    console.error('TTS playback error:', error);
+    return { success: false, error: String(error) };
+  }
+}
+
+
+/**
+ * Check if TTS is available and configured
+ */
+export function isTTSAvailable(): boolean {
+  const { services, activeServiceId } = useTTSStore.getState();
+
+  // Check if we have an active service
+  if (activeServiceId) {
+    const service = services.find(s => s.id === activeServiceId);
+    if (service) {
+      const vendor = findTTSVendor(service.vId);
+      if (vendor?.validateSetup?.(service.setup) !== false) {
+        return true;
+      }
+    }
+  }
+
+  // Check backend capabilities for server-side TTS
+  const caps = getBackendCapabilities();
+  return caps.hasVoiceElevenLabs;
+}
@@ -0,0 +1,87 @@
+import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
+import { getElevenLabsData } from '~/modules/elevenlabs/store-module-elevenlabs';
+import { useModelsStore } from '~/common/stores/llms/store-llms';
+
+import { findTTSVendor } from './vendors.registry';
+import { useTTSStore } from './store-tts';
+import type { TTSVendorId } from './tts.types';
+
+
+/**
+ * Migrates existing TTS configurations to the new TTS store
+ * This should be called once on app initialization
+ */
+export function migrateTTSServices() {
+  const { services, activeServiceId } = useTTSStore.getState();
+
+  // Skip if already migrated (has existing services)
+  if (services.length > 0) {
+    return;
+  }
+
+  // 1. Migrate from existing ElevenLabs configuration
+  const { elevenLabsApiKey, elevenLabsVoiceId } = getElevenLabsData();
+  const { hasVoiceElevenLabs } = getBackendCapabilities();
+
+  if (elevenLabsApiKey || hasVoiceElevenLabs) {
+    const elevenLabsVendor = findTTSVendor('elevenlabs');
+    if (elevenLabsVendor) {
+      const service = useTTSStore.getState().createService(elevenLabsVendor);
+
+      // Set up with existing credentials
+      if (elevenLabsApiKey) {
+        useTTSStore.getState().updateServiceSettings(service.id, {
+          elevenKey: elevenLabsApiKey,
+        });
+      }
+
+      // Set as active service
+      useTTSStore.getState().setActiveServiceId(service.id);
+
+      // Set default voice if available
+      if (elevenLabsVoiceId) {
+        useTTSStore.getState().setActiveVoiceId(elevenLabsVoiceId);
+      }
+
+      console.log('TTS: Migrated ElevenLabs configuration to new TTS store');
+    }
+  }
+
+  // 2. Auto-import from OpenAI LLM configuration
+  autoImportTTSFromLLMs();
+}
+
+
+/**
+ * Auto-imports TTS services from configured LLM services
+ * Creates TTS services when compatible LLM credentials are found
+ */
+export function autoImportTTSFromLLMs() {
+  const { sources } = useModelsStore.getState();
+  const { services } = useTTSStore.getState();
+
+  // Check for OpenAI LLM service
+  const openaiLLMService = sources.find(s => s.vId === 'openai');
+  if (openaiLLMService && openaiLLMService.setup?.oaiKey) {
+    // Check if we already have an OpenAI TTS service with this key
+    const existingOpenAITTS = services.find(
+      s => s.vId === 'openai' && s.setup.oaiKey === openaiLLMService.setup.oaiKey,
+    );
+
+    if (!existingOpenAITTS) {
+      const openaiTTSVendor = findTTSVendor('openai');
+      if (openaiTTSVendor) {
+        const service = useTTSStore.getState().createService(openaiTTSVendor);
+
+        // Copy credentials from LLM service
+        useTTSStore.getState().updateServiceSettings(service.id, {
+          oaiKey: openaiLLMService.setup.oaiKey,
+          oaiHost: openaiLLMService.setup.oaiHost,
+          oaiOrgId: openaiLLMService.setup.oaiOrgId,
+        });
+
+        console.log('TTS: Auto-imported OpenAI TTS service from LLM configuration');
+      }
+    }
+  }
+}
@@ -0,0 +1,65 @@
+//
+// TTS Core Types
+//
+
+export type TTSServiceId = string;
+
+export type TTSVendorId = 'elevenlabs' | 'openai';
+
+/**
+ * Audio formats supported by TTS services
+ */
+export type TTSAudioFormat = 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
+
+/**
+ * Voice representation (unified across all vendors)
+ */
+export interface TTSVoice {
+  id: string;
+  name: string;
+  description?: string;
+  previewUrl?: string;
+  language?: string;
+  category?: string;
+}
+
+/**
+ * Options for TTS generation (superset of all vendor capabilities)
+ */
+export interface TTSGenerationOptions {
+  // Core parameters (all vendors)
+  text: string;
+  voiceId?: string;
+
+  // Common optional parameters
+  speed?: number;              // 0.25-4.0 (OpenAI TTS)
+  format?: TTSAudioFormat;     // Output audio format
+  streaming?: boolean;         // Enable streaming
+
+  // Advanced parameters (vendor-specific, optional)
+  turbo?: boolean;             // ElevenLabs: use turbo model
+  nonEnglish?: boolean;        // ElevenLabs: use multilingual model
+}
+
+/**
+ * Result of TTS generation
+ */
+export interface TTSSpeakResult {
+  success: boolean;
+  audioBase64?: string;        // Available when not streaming
+  error?: string;
+}
+
+/**
+ * TTS Service - configured instance of a TTS vendor
+ */
+export interface DTTSService<TServiceSettings extends object = {}> {
+  id: TTSServiceId;
+  label: string;
+
+  // service -> vendor of that service
+  vId: TTSVendorId;
+
+  // service-specific settings
+  setup: Partial<TServiceSettings>;
+}
@@ -0,0 +1,25 @@
+import { TTSVendorElevenLabs } from './vendors/elevenlabs/elevenlabs.vendor';
+import { TTSVendorOpenAI } from './vendors/openai/openai-tts.vendor';
+
+import type { ITTSVendor } from './ITTSVendor';
+import type { TTSVendorId } from './tts.types';
+
+
+/** Global: TTS Vendor Instances Registry **/
+const TTS_VENDOR_REGISTRY: Record<TTSVendorId, ITTSVendor> = {
+  elevenlabs: TTSVendorElevenLabs,
+  openai: TTSVendorOpenAI,
+} as Record<string, ITTSVendor>;
+
+
+export function findAllTTSVendors(): ITTSVendor[] {
+  const vendors = Object.values(TTS_VENDOR_REGISTRY);
+  vendors.sort((a, b) => a.displayRank - b.displayRank);
+  return vendors;
+}
+
+export function findTTSVendor<TServiceSettings extends object = {}, TAccess = unknown>(
+  vendorId?: TTSVendorId,
+): ITTSVendor<TServiceSettings, TAccess> | null {
+  return vendorId ? (TTS_VENDOR_REGISTRY[vendorId] as ITTSVendor<TServiceSettings, TAccess>) ?? null : null;
+}
@@ -0,0 +1,82 @@
+import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
+
+import { apiStream } from '~/common/util/trpc.client';
+
+import type { ITTSVendor } from '../../ITTSVendor';
+import type { TTSGenerationOptions, TTSVoice } from '../../tts.types';
+
+
+// ElevenLabs Service Settings
+export interface ElevenLabsServiceSettings {
+  elevenKey?: string;
+  elevenHost?: string;
+}
+
+// ElevenLabs Access (for RPC calls)
+export interface ElevenLabsAccess {
+  elevenKey?: string;
+  elevenHost?: string;
+}
+
+
+export const TTSVendorElevenLabs: ITTSVendor<ElevenLabsServiceSettings, ElevenLabsAccess> = {
+  id: 'elevenlabs',
+  name: 'ElevenLabs',
+  displayRank: 10,
+  location: 'cloud',
+  brandColor: undefined,
+
+  hasServerConfigKey: 'hasVoiceElevenLabs',
+
+  capabilities: {
+    streaming: true,
+    voiceCloning: true,
+    speedControl: false,
+    listVoices: true,
+  },
+
+  initializeSetup(): ElevenLabsServiceSettings {
+    return {
+      elevenKey: '',
+      elevenHost: '',
+    };
+  },
+
+  validateSetup(setup: ElevenLabsServiceSettings): boolean {
+    return !setup.elevenKey || setup.elevenKey.trim().length >= 32;
+  },
+
+  getTransportAccess(setup?: Partial<ElevenLabsServiceSettings>): ElevenLabsAccess {
+    return {
+      elevenKey: setup?.elevenKey,
+      elevenHost: setup?.elevenHost,
+    };
+  },
+
+  async rpcSpeak(access: ElevenLabsAccess, options: TTSGenerationOptions): Promise<AsyncIterable<any>> {
+    return apiStream.elevenlabs.speech.mutate({
+      xiKey: access.elevenKey,
+      voiceId: options.voiceId,
+      text: options.text,
+      nonEnglish: options.nonEnglish ?? false,
+      audioStreaming: options.streaming ?? false,
+      audioTurbo: options.turbo ?? false,
+    });
+  },
+
+  async rpcListVoices(access: ElevenLabsAccess): Promise<{ voices: TTSVoice[] }> {
+    const result = await (apiStream as any).elevenlabs.listVoices.query({
+      elevenKey: access.elevenKey,
+    });
+
+    return {
+      voices: result.voices.map((v: any) => ({
+        id: v.id,
+        name: v.name,
+        description: v.description || undefined,
+        previewUrl: v.previewUrl || undefined,
+        category: v.category,
+      })),
+    };
+  },
+};
@@ -0,0 +1,86 @@
+import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
+
+import { apiStream } from '~/common/util/trpc.client';
+
+import type { ITTSVendor } from '../../ITTSVendor';
+import type { TTSGenerationOptions, TTSVoice } from '../../tts.types';
+
+
+// OpenAI TTS Service Settings
+export interface OpenAITTSServiceSettings {
+  oaiKey?: string;
+  oaiHost?: string;
+  oaiOrgId?: string;
+}
+
+// OpenAI TTS Access (for RPC calls)
+export interface OpenAITTSAccess {
+  oaiKey?: string;
+  oaiHost?: string;
+  oaiOrgId?: string;
+}
+
+// OpenAI TTS voices (fixed list)
+export const OPENAI_TTS_VOICES: TTSVoice[] = [
+  { id: 'alloy', name: 'Alloy', description: 'Neutral and balanced' },
+  { id: 'echo', name: 'Echo', description: 'Clear and articulate' },
+  { id: 'fable', name: 'Fable', description: 'Expressive and warm' },
+  { id: 'onyx', name: 'Onyx', description: 'Deep and authoritative' },
+  { id: 'nova', name: 'Nova', description: 'Friendly and conversational' },
+  { id: 'shimmer', name: 'Shimmer', description: 'Soft and gentle' },
+];
+
+
+export const TTSVendorOpenAI: ITTSVendor<OpenAITTSServiceSettings, OpenAITTSAccess> = {
+  id: 'openai',
+  name: 'OpenAI TTS',
+  displayRank: 20,
+  location: 'cloud',
+  brandColor: '#10a37f',
+
+  hasServerConfigKey: 'hasLlmOpenAI',
+
+  capabilities: {
+    streaming: true,
+    voiceCloning: false,
+    speedControl: true,
+    listVoices: true,
+  },
+
+  initializeSetup(): OpenAITTSServiceSettings {
+    return {
+      oaiKey: '',
+      oaiHost: '',
+      oaiOrgId: '',
+    };
+  },
+
+  validateSetup(setup: OpenAITTSServiceSettings): boolean {
+    return !setup.oaiKey || setup.oaiKey.trim().startsWith('sk-');
+  },
+
+  getTransportAccess(setup?: Partial<OpenAITTSServiceSettings>): OpenAITTSAccess {
+    return {
+      oaiKey: setup?.oaiKey,
+      oaiHost: setup?.oaiHost,
+      oaiOrgId: setup?.oaiOrgId,
+    };
+  },
+
+  async rpcSpeak(access: OpenAITTSAccess, options: TTSGenerationOptions): Promise<AsyncIterable<any>> {
+    return apiStream.tts.openai.speech.mutate({
+      access,
+      text: options.text,
+      voice: options.voiceId || 'alloy',
+      model: 'tts-1',
+      speed: options.speed,
+      format: options.format,
+      streaming: options.streaming ?? false,
+    });
+  },
+
+  async rpcListVoices(access: OpenAITTSAccess): Promise<{ voices: TTSVoice[] }> {
+    // OpenAI has a fixed set of voices
+    return { voices: OPENAI_TTS_VOICES };
+  },
+};
@@ -8,6 +8,7 @@ import { llmAnthropicRouter } from '~/modules/llms/server/anthropic/anthropic.ro
 import { llmGeminiRouter } from '~/modules/llms/server/gemini/gemini.router';
 import { llmOllamaRouter } from '~/modules/llms/server/ollama/ollama.router';
 import { llmOpenAIRouter } from '~/modules/llms/server/openai/openai.router';
+import { openaiTTSRouter } from '~/modules/tts/server/openai-tts.router';
 import { youtubeRouter } from '~/modules/youtube/youtube.router';

 /**
@@ -22,6 +23,9 @@ export const appRouterEdge = createTRPCRouter({
  llmGemini: llmGeminiRouter,
  llmOllama: llmOllamaRouter,
  llmOpenAI: llmOpenAIRouter,
+  tts: createTRPCRouter({
+    openai: openaiTTSRouter,
+  }),
  youtube: youtubeRouter,
 });