diff --git a/src/modules/speex/speex.client.ts b/src/modules/speex/speex.client.ts index fa6989e21..a0b2f29a2 100644 --- a/src/modules/speex/speex.client.ts +++ b/src/modules/speex/speex.client.ts @@ -8,12 +8,13 @@ import type { DPersonaUid } from '~/common/stores/persona/persona.types'; -// legacy ElevenLabs backend (to be replaced with speex.router) -import { elevenLabsSpeakText, useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client'; +// Legacy ElevenLabs capability check - fallback only, to be removed once fully ported +import { useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client'; import type { DSpeexEngineAny, DSpeexVoice, DVoiceWebSpeech, SpeexEngineId, SpeexVendorType } from './speex.types'; -import { speakWebSpeech } from './vendors/webspeech.client'; +import { listWebSpeechVoices, speakWebSpeech } from './vendors/webspeech.client'; import { speexAreCredentialsValid, speexFindEngineById, speexFindGlobalEngine, speexFindValidEngineByType, useSpeexStore } from './store-module-speex'; +import { speexListVoicesRPC, speexSynthesizeRPC } from './speex.rpc-client'; // Capability API @@ -104,30 +105,23 @@ export async function speakText( // route based on engine try { - if (engine) { - - switch (engine.vendorType) { - // Web Speech: client-only, no RPC - case 'webspeech': - return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks); - - // ElevenLabs: legacy path (to be replaced with speex.router) - case 'elevenlabs': - return speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks); - - // OpenAI/LocalAI: TODO - route through speex.router once wired - case 'openai': - case 'localai': - return { - success: false, - error: `Engine type '${engine.vendorType}' not yet implemented`, - }; - } + switch (engine?.vendorType) { + // Web Speech: client-only, no RPC + case 'webspeech': + return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks); + // RPC providers: route through speex.router RPC + case 'elevenlabs': + case 'openai': + case 'localai': + return speexSynthesizeRPC(engine, inputText, { streaming, playback, returnAudio }, callbacks); } - // fallback to legacy ElevenLabs path - return await speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks); + // No engine found - return error + return { + success: false, + error: 'No TTS engine configured. Please configure a TTS engine in Settings.', + }; } catch (error) { callbacks?.onError?.(error instanceof Error ? error : new Error(String(error))); return { @@ -160,31 +154,37 @@ function _resolveEngineFromSelector(selector: SpeexVoiceSelector): DSpeexEngineA } -// Private: Speech dispatch functions +// Voice Listing API -export async function speakWithLegacyElevenLabs( - text: string, - voice: SpeexVoiceSelector, - options: { streaming: boolean; playback: boolean; returnAudio: boolean }, - callbacks?: { onStart?: () => void; onChunk?: (chunk: ArrayBuffer) => void; onComplete?: () => void; onError?: (error: Error) => void }, -): Promise { - - // extract voiceId from voice selector - let elevenVoiceId: string | undefined; - if (voice && 'voice' in voice && voice.voice && 'voiceId' in voice.voice) - elevenVoiceId = voice.voice.voiceId; - - const result = await elevenLabsSpeakText( - text, - elevenVoiceId, - options.streaming && options.playback, // Only stream if also playing - true, // turbo mode - ); - - callbacks?.onComplete?.(); - - return { - success: result.success, - audioBase64: options.returnAudio ? result.audioBase64 : undefined, - }; +export interface SpeexVoiceInfo { + id: string; + name: string; + description?: string; + previewUrl?: string; + category?: string; +} + +/** + * List available voices for an engine. + * For cloud providers, this calls the speex.router RPC. + * For webspeech, this uses the browser API. + */ +export async function speexListVoicesForEngine(engine: DSpeexEngineAny): Promise { + switch (engine.vendorType) { + case 'webspeech': + // Use browser API - synchronous but may need async loading + const browserVoices = listWebSpeechVoices(); + return browserVoices.map(v => ({ + id: v.voiceURI, + name: v.name, + description: `${v.lang}${v.localService ? ' (local)' : ''}`, + })); + + case 'elevenlabs': + case 'openai': + case 'localai': + // Use RPC + const result = await speexListVoicesRPC(engine); + return result.voices; + } } diff --git a/src/modules/speex/speex.rpc-client.ts b/src/modules/speex/speex.rpc-client.ts new file mode 100644 index 000000000..4062e75a1 --- /dev/null +++ b/src/modules/speex/speex.rpc-client.ts @@ -0,0 +1,293 @@ +/** + * Speex RPC Client + * + * Handles communication with speex.router for cloud TTS providers. + * Resolves credentials from engine configuration and calls the streaming API. + */ + +import { apiAsync, apiStream } from '~/common/util/trpc.client'; +import { findModelsServiceOrNull } from '~/common/stores/llms/store-llms'; + +import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer'; + +import type { SpeexSpeakResult } from './speex.client'; +import type { DCredentialsApiKey, DCredentialsLLMSService, DSpeexCredentials, DSpeexEngineAny, SpeexRPCDialect } from './speex.types'; +import type { SpeexSpeechParticle, SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './server/speex.wiretypes'; + + +// Configuration +const AUDIO_CHUNK_BUFFER_MS = 100; // Small delay to allow audio buffering + + +/** + * Synthesize speech via speex.router (streaming) + */ +export async function speexSynthesizeRPC( + engine: DSpeexEngineAny, + text: string, + options: { streaming: boolean; playback: boolean; returnAudio: boolean }, + callbacks?: { + onStart?: () => void; + onChunk?: (chunk: ArrayBuffer) => void; + onComplete?: () => void; + onError?: (error: Error) => void; + }, +): Promise { + + // Resolve wire access from engine credentials + const access = _resolveWireAccess(engine); + if (!access) { + const error = new Error(`Failed to resolve credentials for engine ${engine.engineId}`); + callbacks?.onError?.(error); + return { success: false, error: error.message }; + } + + // Build wire voice from engine voice + const voice = _buildWireVoice(engine); + + // Create abort controller + const abortController = new AbortController(); + + // Audio player for streaming playback + let audioPlayer: AudioLivePlayer | null = null; + const audioChunks: ArrayBuffer[] = []; + + try { + // Call the streaming RPC + const particleStream = await apiStream.speex.synthesize.mutate( + { access, text, voice, streaming: options.streaming }, + { signal: abortController.signal }, + ); + + // Process streaming particles + for await (const particle of particleStream) { + switch (particle.t) { + case 'start': + callbacks?.onStart?.(); + if (options.playback && options.streaming) { + audioPlayer = new AudioLivePlayer(); + } + break; + + case 'audio': + // Decode base64 to ArrayBuffer + const audioBuffer = _base64ToArrayBuffer(particle.base64); + + // Playback + if (options.playback && audioPlayer) { + audioPlayer.enqueueChunk(audioBuffer); + } + + // Accumulate for return + if (options.returnAudio) { + audioChunks.push(audioBuffer); + } + + // Callback + callbacks?.onChunk?.(audioBuffer); + break; + + case 'done': + if (audioPlayer) { + audioPlayer.endPlayback(); + } + break; + + case 'error': + throw new Error(particle.e); + } + } + + callbacks?.onComplete?.(); + + // Build result + const result: SpeexSpeakResult = { success: true }; + + if (options.returnAudio && audioChunks.length > 0) { + // Concatenate all chunks and convert to base64 + const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.byteLength, 0); + const combined = new Uint8Array(totalLength); + let offset = 0; + for (const chunk of audioChunks) { + combined.set(new Uint8Array(chunk), offset); + offset += chunk.byteLength; + } + result.audioBase64 = _arrayBufferToBase64(combined.buffer); + } + + return result; + + } catch (error: any) { + // Cleanup + if (audioPlayer) { + void audioPlayer.stop(); + } + + const errorMessage = error.message || 'Synthesis failed'; + callbacks?.onError?.(new Error(errorMessage)); + return { success: false, error: errorMessage }; + } +} + + +/** + * List voices via speex.router + */ +export async function speexListVoicesRPC(engine: DSpeexEngineAny): Promise { + const access = _resolveWireAccess(engine); + if (!access) { + return { voices: [] }; + } + + try { + return await apiAsync.speex.listVoices.query({ access }); + } catch (error) { + console.error('Failed to list voices:', error); + return { voices: [] }; + } +} + + +// Private: Credential Resolution + +function _resolveWireAccess(engine: DSpeexEngineAny): SpeexWire_Access | null { + const { vendorType, credentials } = engine; + + // webspeech doesn't use RPC + if (vendorType === 'webspeech') return null; + + const dialect = vendorType as SpeexRPCDialect; + + switch (credentials.type) { + case 'api-key': + return _resolveFromApiKey(dialect, credentials); + + case 'llms-service': + return _resolveFromLLMService(dialect, credentials); + + default: + // 'none' credentials or unknown type + return null; + } +} + + +function _resolveFromApiKey(dialect: SpeexRPCDialect, credentials: DCredentialsApiKey): SpeexWire_Access | null { + switch (dialect) { + case 'elevenlabs': + if (!credentials.apiKey) return null; + return { + dialect: 'elevenlabs', + apiKey: credentials.apiKey, + apiHost: credentials.apiHost, + }; + + case 'openai': + if (!credentials.apiKey) return null; + return { + dialect: 'openai', + apiKey: credentials.apiKey, + apiHost: credentials.apiHost, + }; + + case 'localai': + if (!credentials.apiHost) return null; + return { + dialect: 'localai', + apiKey: credentials.apiKey, + apiHost: credentials.apiHost, + }; + } +} + + +function _resolveFromLLMService(dialect: SpeexRPCDialect, credentials: DCredentialsLLMSService): SpeexWire_Access | null { + const service = findModelsServiceOrNull(credentials.serviceId); + if (!service) return null; + + // Extract credentials based on LLM vendor type + const setup = service.setup as Record; + + switch (dialect) { + case 'elevenlabs': + // ElevenLabs doesn't typically link to LLM services + return null; + + case 'openai': + // OpenAI LLM service uses oaiKey, oaiHost, oaiOrg + return { + dialect: 'openai', + apiKey: setup.oaiKey || '', + apiHost: setup.oaiHost || undefined, + orgId: setup.oaiOrg || undefined, + }; + + case 'localai': + // LocalAI LLM service uses host + // LocalAI vendor uses 'localAIHost' field + const host = setup.localAIHost || setup.oaiHost || ''; + if (!host) return null; + return { + dialect: 'localai', + apiHost: host, + apiKey: setup.localAIKey || setup.oaiKey || '', + }; + } +} + + +// Private: Voice Building + +function _buildWireVoice(engine: DSpeexEngineAny): SpeexWire_Voice { + const { vendorType, voice } = engine; + + switch (vendorType) { + case 'elevenlabs': + return { + dialect: 'elevenlabs', + voiceId: voice.voiceId, + model: voice.ttsModel, + }; + + case 'openai': + return { + dialect: 'openai', + voiceId: voice.voiceId, + model: voice.ttsModel, + speed: voice.speed, + instruction: voice.instruction, + }; + + case 'localai': + return { + dialect: 'localai', + voiceId: voice.voiceId, + model: voice.ttsModel, + }; + + case 'webspeech': + // webspeech doesn't use wire protocol + throw new Error('webspeech does not use RPC'); + } +} + + +// Private: Helpers + +function _base64ToArrayBuffer(base64: string): ArrayBuffer { + const binaryString = atob(base64); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + return bytes.buffer; +} + +function _arrayBufferToBase64(buffer: ArrayBuffer): string { + const bytes = new Uint8Array(buffer); + let binary = ''; + for (let i = 0; i < bytes.byteLength; i++) { + binary += String.fromCharCode(bytes[i]); + } + return btoa(binary); +}