Speex: rpc backend

2026-05-10 21:50:14 -07:00 · 2025-11-25 22:20:30 -08:00
parent 5eba375f4d
commit 76187ba0e7
2 changed files with 343 additions and 50 deletions
@@ -8,12 +8,13 @@

 import type { DPersonaUid } from '~/common/stores/persona/persona.types';

-// legacy ElevenLabs backend (to be replaced with speex.router)
-import { elevenLabsSpeakText, useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client';
+// Legacy ElevenLabs capability check - fallback only, to be removed once fully ported
+import { useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client';

 import type { DSpeexEngineAny, DSpeexVoice, DVoiceWebSpeech, SpeexEngineId, SpeexVendorType } from './speex.types';
-import { speakWebSpeech } from './vendors/webspeech.client';
+import { listWebSpeechVoices, speakWebSpeech } from './vendors/webspeech.client';
 import { speexAreCredentialsValid, speexFindEngineById, speexFindGlobalEngine, speexFindValidEngineByType, useSpeexStore } from './store-module-speex';
+import { speexListVoicesRPC, speexSynthesizeRPC } from './speex.rpc-client';


 // Capability API
@@ -104,30 +105,23 @@ export async function speakText(
  // route based on engine
  try {

-    if (engine) {
-
-      switch (engine.vendorType) {
-        // Web Speech: client-only, no RPC
-        case 'webspeech':
-          return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks);
-
-        // ElevenLabs: legacy path (to be replaced with speex.router)
-        case 'elevenlabs':
-          return speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks);
-
-        // OpenAI/LocalAI: TODO - route through speex.router once wired
-        case 'openai':
-        case 'localai':
-          return {
-            success: false,
-            error: `Engine type '${engine.vendorType}' not yet implemented`,
-          };
-      }
+    switch (engine?.vendorType) {
+      // Web Speech: client-only, no RPC
+      case 'webspeech':
+        return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks);

+      // RPC providers: route through speex.router RPC
+      case 'elevenlabs':
+      case 'openai':
+      case 'localai':
+        return speexSynthesizeRPC(engine, inputText, { streaming, playback, returnAudio }, callbacks);
    }

-    // fallback to legacy ElevenLabs path
-    return await speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks);
+    // No engine found - return error
+    return {
+      success: false,
+      error: 'No TTS engine configured. Please configure a TTS engine in Settings.',
+    };
  } catch (error) {
    callbacks?.onError?.(error instanceof Error ? error : new Error(String(error)));
    return {
@@ -160,31 +154,37 @@ function _resolveEngineFromSelector(selector: SpeexVoiceSelector): DSpeexEngineA
 }


-// Private: Speech dispatch functions
+// Voice Listing API

-export async function speakWithLegacyElevenLabs(
-  text: string,
-  voice: SpeexVoiceSelector,
-  options: { streaming: boolean; playback: boolean; returnAudio: boolean },
-  callbacks?: { onStart?: () => void; onChunk?: (chunk: ArrayBuffer) => void; onComplete?: () => void; onError?: (error: Error) => void },
-): Promise<SpeexSpeakResult> {
-
-  // extract voiceId from voice selector
-  let elevenVoiceId: string | undefined;
-  if (voice && 'voice' in voice && voice.voice && 'voiceId' in voice.voice)
-    elevenVoiceId = voice.voice.voiceId;
-
-  const result = await elevenLabsSpeakText(
-    text,
-    elevenVoiceId,
-    options.streaming && options.playback, // Only stream if also playing
-    true, // turbo mode
-  );
-
-  callbacks?.onComplete?.();
-
-  return {
-    success: result.success,
-    audioBase64: options.returnAudio ? result.audioBase64 : undefined,
-  };
+export interface SpeexVoiceInfo {
+  id: string;
+  name: string;
+  description?: string;
+  previewUrl?: string;
+  category?: string;
+}
+
+/**
+ * List available voices for an engine.
+ * For cloud providers, this calls the speex.router RPC.
+ * For webspeech, this uses the browser API.
+ */
+export async function speexListVoicesForEngine(engine: DSpeexEngineAny): Promise<SpeexVoiceInfo[]> {
+  switch (engine.vendorType) {
+    case 'webspeech':
+      // Use browser API - synchronous but may need async loading
+      const browserVoices = listWebSpeechVoices();
+      return browserVoices.map(v => ({
+        id: v.voiceURI,
+        name: v.name,
+        description: `${v.lang}${v.localService ? ' (local)' : ''}`,
+      }));
+
+    case 'elevenlabs':
+    case 'openai':
+    case 'localai':
+      // Use RPC
+      const result = await speexListVoicesRPC(engine);
+      return result.voices;
+  }
 }
@@ -0,0 +1,293 @@
+/**
+ * Speex RPC Client
+ *
+ * Handles communication with speex.router for cloud TTS providers.
+ * Resolves credentials from engine configuration and calls the streaming API.
+ */
+
+import { apiAsync, apiStream } from '~/common/util/trpc.client';
+import { findModelsServiceOrNull } from '~/common/stores/llms/store-llms';
+
+import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
+
+import type { SpeexSpeakResult } from './speex.client';
+import type { DCredentialsApiKey, DCredentialsLLMSService, DSpeexCredentials, DSpeexEngineAny, SpeexRPCDialect } from './speex.types';
+import type { SpeexSpeechParticle, SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './server/speex.wiretypes';
+
+
+// Configuration
+const AUDIO_CHUNK_BUFFER_MS = 100; // Small delay to allow audio buffering
+
+
+/**
+ * Synthesize speech via speex.router (streaming)
+ */
+export async function speexSynthesizeRPC(
+  engine: DSpeexEngineAny,
+  text: string,
+  options: { streaming: boolean; playback: boolean; returnAudio: boolean },
+  callbacks?: {
+    onStart?: () => void;
+    onChunk?: (chunk: ArrayBuffer) => void;
+    onComplete?: () => void;
+    onError?: (error: Error) => void;
+  },
+): Promise<SpeexSpeakResult> {
+
+  // Resolve wire access from engine credentials
+  const access = _resolveWireAccess(engine);
+  if (!access) {
+    const error = new Error(`Failed to resolve credentials for engine ${engine.engineId}`);
+    callbacks?.onError?.(error);
+    return { success: false, error: error.message };
+  }
+
+  // Build wire voice from engine voice
+  const voice = _buildWireVoice(engine);
+
+  // Create abort controller
+  const abortController = new AbortController();
+
+  // Audio player for streaming playback
+  let audioPlayer: AudioLivePlayer | null = null;
+  const audioChunks: ArrayBuffer[] = [];
+
+  try {
+    // Call the streaming RPC
+    const particleStream = await apiStream.speex.synthesize.mutate(
+      { access, text, voice, streaming: options.streaming },
+      { signal: abortController.signal },
+    );
+
+    // Process streaming particles
+    for await (const particle of particleStream) {
+      switch (particle.t) {
+        case 'start':
+          callbacks?.onStart?.();
+          if (options.playback && options.streaming) {
+            audioPlayer = new AudioLivePlayer();
+          }
+          break;
+
+        case 'audio':
+          // Decode base64 to ArrayBuffer
+          const audioBuffer = _base64ToArrayBuffer(particle.base64);
+
+          // Playback
+          if (options.playback && audioPlayer) {
+            audioPlayer.enqueueChunk(audioBuffer);
+          }
+
+          // Accumulate for return
+          if (options.returnAudio) {
+            audioChunks.push(audioBuffer);
+          }
+
+          // Callback
+          callbacks?.onChunk?.(audioBuffer);
+          break;
+
+        case 'done':
+          if (audioPlayer) {
+            audioPlayer.endPlayback();
+          }
+          break;
+
+        case 'error':
+          throw new Error(particle.e);
+      }
+    }
+
+    callbacks?.onComplete?.();
+
+    // Build result
+    const result: SpeexSpeakResult = { success: true };
+
+    if (options.returnAudio && audioChunks.length > 0) {
+      // Concatenate all chunks and convert to base64
+      const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
+      const combined = new Uint8Array(totalLength);
+      let offset = 0;
+      for (const chunk of audioChunks) {
+        combined.set(new Uint8Array(chunk), offset);
+        offset += chunk.byteLength;
+      }
+      result.audioBase64 = _arrayBufferToBase64(combined.buffer);
+    }
+
+    return result;
+
+  } catch (error: any) {
+    // Cleanup
+    if (audioPlayer) {
+      void audioPlayer.stop();
+    }
+
+    const errorMessage = error.message || 'Synthesis failed';
+    callbacks?.onError?.(new Error(errorMessage));
+    return { success: false, error: errorMessage };
+  }
+}
+
+
+/**
+ * List voices via speex.router
+ */
+export async function speexListVoicesRPC(engine: DSpeexEngineAny): Promise<SpeexWire_ListVoices_Output> {
+  const access = _resolveWireAccess(engine);
+  if (!access) {
+    return { voices: [] };
+  }
+
+  try {
+    return await apiAsync.speex.listVoices.query({ access });
+  } catch (error) {
+    console.error('Failed to list voices:', error);
+    return { voices: [] };
+  }
+}
+
+
+// Private: Credential Resolution
+
+function _resolveWireAccess(engine: DSpeexEngineAny): SpeexWire_Access | null {
+  const { vendorType, credentials } = engine;
+
+  // webspeech doesn't use RPC
+  if (vendorType === 'webspeech') return null;
+
+  const dialect = vendorType as SpeexRPCDialect;
+
+  switch (credentials.type) {
+    case 'api-key':
+      return _resolveFromApiKey(dialect, credentials);
+
+    case 'llms-service':
+      return _resolveFromLLMService(dialect, credentials);
+
+    default:
+      // 'none' credentials or unknown type
+      return null;
+  }
+}
+
+
+function _resolveFromApiKey(dialect: SpeexRPCDialect, credentials: DCredentialsApiKey): SpeexWire_Access | null {
+  switch (dialect) {
+    case 'elevenlabs':
+      if (!credentials.apiKey) return null;
+      return {
+        dialect: 'elevenlabs',
+        apiKey: credentials.apiKey,
+        apiHost: credentials.apiHost,
+      };
+
+    case 'openai':
+      if (!credentials.apiKey) return null;
+      return {
+        dialect: 'openai',
+        apiKey: credentials.apiKey,
+        apiHost: credentials.apiHost,
+      };
+
+    case 'localai':
+      if (!credentials.apiHost) return null;
+      return {
+        dialect: 'localai',
+        apiKey: credentials.apiKey,
+        apiHost: credentials.apiHost,
+      };
+  }
+}
+
+
+function _resolveFromLLMService(dialect: SpeexRPCDialect, credentials: DCredentialsLLMSService): SpeexWire_Access | null {
+  const service = findModelsServiceOrNull(credentials.serviceId);
+  if (!service) return null;
+
+  // Extract credentials based on LLM vendor type
+  const setup = service.setup as Record<string, any>;
+
+  switch (dialect) {
+    case 'elevenlabs':
+      // ElevenLabs doesn't typically link to LLM services
+      return null;
+
+    case 'openai':
+      // OpenAI LLM service uses oaiKey, oaiHost, oaiOrg
+      return {
+        dialect: 'openai',
+        apiKey: setup.oaiKey || '',
+        apiHost: setup.oaiHost || undefined,
+        orgId: setup.oaiOrg || undefined,
+      };
+
+    case 'localai':
+      // LocalAI LLM service uses host
+      // LocalAI vendor uses 'localAIHost' field
+      const host = setup.localAIHost || setup.oaiHost || '';
+      if (!host) return null;
+      return {
+        dialect: 'localai',
+        apiHost: host,
+        apiKey: setup.localAIKey || setup.oaiKey || '',
+      };
+  }
+}
+
+
+// Private: Voice Building
+
+function _buildWireVoice(engine: DSpeexEngineAny): SpeexWire_Voice {
+  const { vendorType, voice } = engine;
+
+  switch (vendorType) {
+    case 'elevenlabs':
+      return {
+        dialect: 'elevenlabs',
+        voiceId: voice.voiceId,
+        model: voice.ttsModel,
+      };
+
+    case 'openai':
+      return {
+        dialect: 'openai',
+        voiceId: voice.voiceId,
+        model: voice.ttsModel,
+        speed: voice.speed,
+        instruction: voice.instruction,
+      };
+
+    case 'localai':
+      return {
+        dialect: 'localai',
+        voiceId: voice.voiceId,
+        model: voice.ttsModel,
+      };
+
+    case 'webspeech':
+      // webspeech doesn't use wire protocol
+      throw new Error('webspeech does not use RPC');
+  }
+}
+
+
+// Private: Helpers
+
+function _base64ToArrayBuffer(base64: string): ArrayBuffer {
+  const binaryString = atob(base64);
+  const bytes = new Uint8Array(binaryString.length);
+  for (let i = 0; i < binaryString.length; i++) {
+    bytes[i] = binaryString.charCodeAt(i);
+  }
+  return bytes.buffer;
+}
+
+function _arrayBufferToBase64(buffer: ArrayBuffer): string {
+  const bytes = new Uint8Array(buffer);
+  let binary = '';
+  for (let i = 0; i < bytes.byteLength; i++) {
+    binary += String.fromCharCode(bytes[i]);
+  }
+  return btoa(binary);
+}