Speex: rpc backend

This commit is contained in:
Enrico Ros
2025-11-25 22:20:30 -08:00
parent 5eba375f4d
commit 76187ba0e7
2 changed files with 343 additions and 50 deletions
+50 -50
View File
@@ -8,12 +8,13 @@
import type { DPersonaUid } from '~/common/stores/persona/persona.types';
// legacy ElevenLabs backend (to be replaced with speex.router)
import { elevenLabsSpeakText, useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client';
// Legacy ElevenLabs capability check - fallback only, to be removed once fully ported
import { useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client';
import type { DSpeexEngineAny, DSpeexVoice, DVoiceWebSpeech, SpeexEngineId, SpeexVendorType } from './speex.types';
import { speakWebSpeech } from './vendors/webspeech.client';
import { listWebSpeechVoices, speakWebSpeech } from './vendors/webspeech.client';
import { speexAreCredentialsValid, speexFindEngineById, speexFindGlobalEngine, speexFindValidEngineByType, useSpeexStore } from './store-module-speex';
import { speexListVoicesRPC, speexSynthesizeRPC } from './speex.rpc-client';
// Capability API
@@ -104,30 +105,23 @@ export async function speakText(
// route based on engine
try {
if (engine) {
switch (engine.vendorType) {
// Web Speech: client-only, no RPC
case 'webspeech':
return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks);
// ElevenLabs: legacy path (to be replaced with speex.router)
case 'elevenlabs':
return speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks);
// OpenAI/LocalAI: TODO - route through speex.router once wired
case 'openai':
case 'localai':
return {
success: false,
error: `Engine type '${engine.vendorType}' not yet implemented`,
};
}
switch (engine?.vendorType) {
// Web Speech: client-only, no RPC
case 'webspeech':
return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks);
// RPC providers: route through speex.router RPC
case 'elevenlabs':
case 'openai':
case 'localai':
return speexSynthesizeRPC(engine, inputText, { streaming, playback, returnAudio }, callbacks);
}
// fallback to legacy ElevenLabs path
return await speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks);
// No engine found - return error
return {
success: false,
error: 'No TTS engine configured. Please configure a TTS engine in Settings.',
};
} catch (error) {
callbacks?.onError?.(error instanceof Error ? error : new Error(String(error)));
return {
@@ -160,31 +154,37 @@ function _resolveEngineFromSelector(selector: SpeexVoiceSelector): DSpeexEngineA
}
// Private: Speech dispatch functions
// Voice Listing API
export async function speakWithLegacyElevenLabs(
text: string,
voice: SpeexVoiceSelector,
options: { streaming: boolean; playback: boolean; returnAudio: boolean },
callbacks?: { onStart?: () => void; onChunk?: (chunk: ArrayBuffer) => void; onComplete?: () => void; onError?: (error: Error) => void },
): Promise<SpeexSpeakResult> {
// extract voiceId from voice selector
let elevenVoiceId: string | undefined;
if (voice && 'voice' in voice && voice.voice && 'voiceId' in voice.voice)
elevenVoiceId = voice.voice.voiceId;
const result = await elevenLabsSpeakText(
text,
elevenVoiceId,
options.streaming && options.playback, // Only stream if also playing
true, // turbo mode
);
callbacks?.onComplete?.();
return {
success: result.success,
audioBase64: options.returnAudio ? result.audioBase64 : undefined,
};
export interface SpeexVoiceInfo {
id: string;
name: string;
description?: string;
previewUrl?: string;
category?: string;
}
/**
* List available voices for an engine.
* For cloud providers, this calls the speex.router RPC.
* For webspeech, this uses the browser API.
*/
export async function speexListVoicesForEngine(engine: DSpeexEngineAny): Promise<SpeexVoiceInfo[]> {
switch (engine.vendorType) {
case 'webspeech':
// Use browser API - synchronous but may need async loading
const browserVoices = listWebSpeechVoices();
return browserVoices.map(v => ({
id: v.voiceURI,
name: v.name,
description: `${v.lang}${v.localService ? ' (local)' : ''}`,
}));
case 'elevenlabs':
case 'openai':
case 'localai':
// Use RPC
const result = await speexListVoicesRPC(engine);
return result.voices;
}
}
+293
View File
@@ -0,0 +1,293 @@
/**
* Speex RPC Client
*
* Handles communication with speex.router for cloud TTS providers.
* Resolves credentials from engine configuration and calls the streaming API.
*/
import { apiAsync, apiStream } from '~/common/util/trpc.client';
import { findModelsServiceOrNull } from '~/common/stores/llms/store-llms';
import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
import type { SpeexSpeakResult } from './speex.client';
import type { DCredentialsApiKey, DCredentialsLLMSService, DSpeexCredentials, DSpeexEngineAny, SpeexRPCDialect } from './speex.types';
import type { SpeexSpeechParticle, SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './server/speex.wiretypes';
// Configuration
const AUDIO_CHUNK_BUFFER_MS = 100; // Small delay to allow audio buffering
/**
* Synthesize speech via speex.router (streaming)
*/
export async function speexSynthesizeRPC(
engine: DSpeexEngineAny,
text: string,
options: { streaming: boolean; playback: boolean; returnAudio: boolean },
callbacks?: {
onStart?: () => void;
onChunk?: (chunk: ArrayBuffer) => void;
onComplete?: () => void;
onError?: (error: Error) => void;
},
): Promise<SpeexSpeakResult> {
// Resolve wire access from engine credentials
const access = _resolveWireAccess(engine);
if (!access) {
const error = new Error(`Failed to resolve credentials for engine ${engine.engineId}`);
callbacks?.onError?.(error);
return { success: false, error: error.message };
}
// Build wire voice from engine voice
const voice = _buildWireVoice(engine);
// Create abort controller
const abortController = new AbortController();
// Audio player for streaming playback
let audioPlayer: AudioLivePlayer | null = null;
const audioChunks: ArrayBuffer[] = [];
try {
// Call the streaming RPC
const particleStream = await apiStream.speex.synthesize.mutate(
{ access, text, voice, streaming: options.streaming },
{ signal: abortController.signal },
);
// Process streaming particles
for await (const particle of particleStream) {
switch (particle.t) {
case 'start':
callbacks?.onStart?.();
if (options.playback && options.streaming) {
audioPlayer = new AudioLivePlayer();
}
break;
case 'audio':
// Decode base64 to ArrayBuffer
const audioBuffer = _base64ToArrayBuffer(particle.base64);
// Playback
if (options.playback && audioPlayer) {
audioPlayer.enqueueChunk(audioBuffer);
}
// Accumulate for return
if (options.returnAudio) {
audioChunks.push(audioBuffer);
}
// Callback
callbacks?.onChunk?.(audioBuffer);
break;
case 'done':
if (audioPlayer) {
audioPlayer.endPlayback();
}
break;
case 'error':
throw new Error(particle.e);
}
}
callbacks?.onComplete?.();
// Build result
const result: SpeexSpeakResult = { success: true };
if (options.returnAudio && audioChunks.length > 0) {
// Concatenate all chunks and convert to base64
const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
const combined = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of audioChunks) {
combined.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
}
result.audioBase64 = _arrayBufferToBase64(combined.buffer);
}
return result;
} catch (error: any) {
// Cleanup
if (audioPlayer) {
void audioPlayer.stop();
}
const errorMessage = error.message || 'Synthesis failed';
callbacks?.onError?.(new Error(errorMessage));
return { success: false, error: errorMessage };
}
}
/**
* List voices via speex.router
*/
export async function speexListVoicesRPC(engine: DSpeexEngineAny): Promise<SpeexWire_ListVoices_Output> {
const access = _resolveWireAccess(engine);
if (!access) {
return { voices: [] };
}
try {
return await apiAsync.speex.listVoices.query({ access });
} catch (error) {
console.error('Failed to list voices:', error);
return { voices: [] };
}
}
// Private: Credential Resolution
function _resolveWireAccess(engine: DSpeexEngineAny): SpeexWire_Access | null {
const { vendorType, credentials } = engine;
// webspeech doesn't use RPC
if (vendorType === 'webspeech') return null;
const dialect = vendorType as SpeexRPCDialect;
switch (credentials.type) {
case 'api-key':
return _resolveFromApiKey(dialect, credentials);
case 'llms-service':
return _resolveFromLLMService(dialect, credentials);
default:
// 'none' credentials or unknown type
return null;
}
}
function _resolveFromApiKey(dialect: SpeexRPCDialect, credentials: DCredentialsApiKey): SpeexWire_Access | null {
switch (dialect) {
case 'elevenlabs':
if (!credentials.apiKey) return null;
return {
dialect: 'elevenlabs',
apiKey: credentials.apiKey,
apiHost: credentials.apiHost,
};
case 'openai':
if (!credentials.apiKey) return null;
return {
dialect: 'openai',
apiKey: credentials.apiKey,
apiHost: credentials.apiHost,
};
case 'localai':
if (!credentials.apiHost) return null;
return {
dialect: 'localai',
apiKey: credentials.apiKey,
apiHost: credentials.apiHost,
};
}
}
function _resolveFromLLMService(dialect: SpeexRPCDialect, credentials: DCredentialsLLMSService): SpeexWire_Access | null {
const service = findModelsServiceOrNull(credentials.serviceId);
if (!service) return null;
// Extract credentials based on LLM vendor type
const setup = service.setup as Record<string, any>;
switch (dialect) {
case 'elevenlabs':
// ElevenLabs doesn't typically link to LLM services
return null;
case 'openai':
// OpenAI LLM service uses oaiKey, oaiHost, oaiOrg
return {
dialect: 'openai',
apiKey: setup.oaiKey || '',
apiHost: setup.oaiHost || undefined,
orgId: setup.oaiOrg || undefined,
};
case 'localai':
// LocalAI LLM service uses host
// LocalAI vendor uses 'localAIHost' field
const host = setup.localAIHost || setup.oaiHost || '';
if (!host) return null;
return {
dialect: 'localai',
apiHost: host,
apiKey: setup.localAIKey || setup.oaiKey || '',
};
}
}
// Private: Voice Building
function _buildWireVoice(engine: DSpeexEngineAny): SpeexWire_Voice {
const { vendorType, voice } = engine;
switch (vendorType) {
case 'elevenlabs':
return {
dialect: 'elevenlabs',
voiceId: voice.voiceId,
model: voice.ttsModel,
};
case 'openai':
return {
dialect: 'openai',
voiceId: voice.voiceId,
model: voice.ttsModel,
speed: voice.speed,
instruction: voice.instruction,
};
case 'localai':
return {
dialect: 'localai',
voiceId: voice.voiceId,
model: voice.ttsModel,
};
case 'webspeech':
// webspeech doesn't use wire protocol
throw new Error('webspeech does not use RPC');
}
}
// Private: Helpers
function _base64ToArrayBuffer(base64: string): ArrayBuffer {
const binaryString = atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
function _arrayBufferToBase64(buffer: ArrayBuffer): string {
const bytes = new Uint8Array(buffer);
let binary = '';
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}