mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Speex: rpc backend
This commit is contained in:
@@ -8,12 +8,13 @@
|
||||
|
||||
import type { DPersonaUid } from '~/common/stores/persona/persona.types';
|
||||
|
||||
// legacy ElevenLabs backend (to be replaced with speex.router)
|
||||
import { elevenLabsSpeakText, useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client';
|
||||
// Legacy ElevenLabs capability check - fallback only, to be removed once fully ported
|
||||
import { useCapabilityElevenlabs } from '~/modules/elevenlabs/elevenlabs.client';
|
||||
|
||||
import type { DSpeexEngineAny, DSpeexVoice, DVoiceWebSpeech, SpeexEngineId, SpeexVendorType } from './speex.types';
|
||||
import { speakWebSpeech } from './vendors/webspeech.client';
|
||||
import { listWebSpeechVoices, speakWebSpeech } from './vendors/webspeech.client';
|
||||
import { speexAreCredentialsValid, speexFindEngineById, speexFindGlobalEngine, speexFindValidEngineByType, useSpeexStore } from './store-module-speex';
|
||||
import { speexListVoicesRPC, speexSynthesizeRPC } from './speex.rpc-client';
|
||||
|
||||
|
||||
// Capability API
|
||||
@@ -104,30 +105,23 @@ export async function speakText(
|
||||
// route based on engine
|
||||
try {
|
||||
|
||||
if (engine) {
|
||||
|
||||
switch (engine.vendorType) {
|
||||
// Web Speech: client-only, no RPC
|
||||
case 'webspeech':
|
||||
return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks);
|
||||
|
||||
// ElevenLabs: legacy path (to be replaced with speex.router)
|
||||
case 'elevenlabs':
|
||||
return speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks);
|
||||
|
||||
// OpenAI/LocalAI: TODO - route through speex.router once wired
|
||||
case 'openai':
|
||||
case 'localai':
|
||||
return {
|
||||
success: false,
|
||||
error: `Engine type '${engine.vendorType}' not yet implemented`,
|
||||
};
|
||||
}
|
||||
switch (engine?.vendorType) {
|
||||
// Web Speech: client-only, no RPC
|
||||
case 'webspeech':
|
||||
return speakWebSpeech(inputText, engine.voice as DVoiceWebSpeech, callbacks);
|
||||
|
||||
// RPC providers: route through speex.router RPC
|
||||
case 'elevenlabs':
|
||||
case 'openai':
|
||||
case 'localai':
|
||||
return speexSynthesizeRPC(engine, inputText, { streaming, playback, returnAudio }, callbacks);
|
||||
}
|
||||
|
||||
// fallback to legacy ElevenLabs path
|
||||
return await speakWithLegacyElevenLabs(inputText, voice, { streaming, playback, returnAudio }, callbacks);
|
||||
// No engine found - return error
|
||||
return {
|
||||
success: false,
|
||||
error: 'No TTS engine configured. Please configure a TTS engine in Settings.',
|
||||
};
|
||||
} catch (error) {
|
||||
callbacks?.onError?.(error instanceof Error ? error : new Error(String(error)));
|
||||
return {
|
||||
@@ -160,31 +154,37 @@ function _resolveEngineFromSelector(selector: SpeexVoiceSelector): DSpeexEngineA
|
||||
}
|
||||
|
||||
|
||||
// Private: Speech dispatch functions
|
||||
// Voice Listing API
|
||||
|
||||
export async function speakWithLegacyElevenLabs(
|
||||
text: string,
|
||||
voice: SpeexVoiceSelector,
|
||||
options: { streaming: boolean; playback: boolean; returnAudio: boolean },
|
||||
callbacks?: { onStart?: () => void; onChunk?: (chunk: ArrayBuffer) => void; onComplete?: () => void; onError?: (error: Error) => void },
|
||||
): Promise<SpeexSpeakResult> {
|
||||
|
||||
// extract voiceId from voice selector
|
||||
let elevenVoiceId: string | undefined;
|
||||
if (voice && 'voice' in voice && voice.voice && 'voiceId' in voice.voice)
|
||||
elevenVoiceId = voice.voice.voiceId;
|
||||
|
||||
const result = await elevenLabsSpeakText(
|
||||
text,
|
||||
elevenVoiceId,
|
||||
options.streaming && options.playback, // Only stream if also playing
|
||||
true, // turbo mode
|
||||
);
|
||||
|
||||
callbacks?.onComplete?.();
|
||||
|
||||
return {
|
||||
success: result.success,
|
||||
audioBase64: options.returnAudio ? result.audioBase64 : undefined,
|
||||
};
|
||||
export interface SpeexVoiceInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
description?: string;
|
||||
previewUrl?: string;
|
||||
category?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* List available voices for an engine.
|
||||
* For cloud providers, this calls the speex.router RPC.
|
||||
* For webspeech, this uses the browser API.
|
||||
*/
|
||||
export async function speexListVoicesForEngine(engine: DSpeexEngineAny): Promise<SpeexVoiceInfo[]> {
|
||||
switch (engine.vendorType) {
|
||||
case 'webspeech':
|
||||
// Use browser API - synchronous but may need async loading
|
||||
const browserVoices = listWebSpeechVoices();
|
||||
return browserVoices.map(v => ({
|
||||
id: v.voiceURI,
|
||||
name: v.name,
|
||||
description: `${v.lang}${v.localService ? ' (local)' : ''}`,
|
||||
}));
|
||||
|
||||
case 'elevenlabs':
|
||||
case 'openai':
|
||||
case 'localai':
|
||||
// Use RPC
|
||||
const result = await speexListVoicesRPC(engine);
|
||||
return result.voices;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,293 @@
|
||||
/**
|
||||
* Speex RPC Client
|
||||
*
|
||||
* Handles communication with speex.router for cloud TTS providers.
|
||||
* Resolves credentials from engine configuration and calls the streaming API.
|
||||
*/
|
||||
|
||||
import { apiAsync, apiStream } from '~/common/util/trpc.client';
|
||||
import { findModelsServiceOrNull } from '~/common/stores/llms/store-llms';
|
||||
|
||||
import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
|
||||
|
||||
import type { SpeexSpeakResult } from './speex.client';
|
||||
import type { DCredentialsApiKey, DCredentialsLLMSService, DSpeexCredentials, DSpeexEngineAny, SpeexRPCDialect } from './speex.types';
|
||||
import type { SpeexSpeechParticle, SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './server/speex.wiretypes';
|
||||
|
||||
|
||||
// Configuration
|
||||
const AUDIO_CHUNK_BUFFER_MS = 100; // Small delay to allow audio buffering
|
||||
|
||||
|
||||
/**
|
||||
* Synthesize speech via speex.router (streaming)
|
||||
*/
|
||||
export async function speexSynthesizeRPC(
|
||||
engine: DSpeexEngineAny,
|
||||
text: string,
|
||||
options: { streaming: boolean; playback: boolean; returnAudio: boolean },
|
||||
callbacks?: {
|
||||
onStart?: () => void;
|
||||
onChunk?: (chunk: ArrayBuffer) => void;
|
||||
onComplete?: () => void;
|
||||
onError?: (error: Error) => void;
|
||||
},
|
||||
): Promise<SpeexSpeakResult> {
|
||||
|
||||
// Resolve wire access from engine credentials
|
||||
const access = _resolveWireAccess(engine);
|
||||
if (!access) {
|
||||
const error = new Error(`Failed to resolve credentials for engine ${engine.engineId}`);
|
||||
callbacks?.onError?.(error);
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
|
||||
// Build wire voice from engine voice
|
||||
const voice = _buildWireVoice(engine);
|
||||
|
||||
// Create abort controller
|
||||
const abortController = new AbortController();
|
||||
|
||||
// Audio player for streaming playback
|
||||
let audioPlayer: AudioLivePlayer | null = null;
|
||||
const audioChunks: ArrayBuffer[] = [];
|
||||
|
||||
try {
|
||||
// Call the streaming RPC
|
||||
const particleStream = await apiStream.speex.synthesize.mutate(
|
||||
{ access, text, voice, streaming: options.streaming },
|
||||
{ signal: abortController.signal },
|
||||
);
|
||||
|
||||
// Process streaming particles
|
||||
for await (const particle of particleStream) {
|
||||
switch (particle.t) {
|
||||
case 'start':
|
||||
callbacks?.onStart?.();
|
||||
if (options.playback && options.streaming) {
|
||||
audioPlayer = new AudioLivePlayer();
|
||||
}
|
||||
break;
|
||||
|
||||
case 'audio':
|
||||
// Decode base64 to ArrayBuffer
|
||||
const audioBuffer = _base64ToArrayBuffer(particle.base64);
|
||||
|
||||
// Playback
|
||||
if (options.playback && audioPlayer) {
|
||||
audioPlayer.enqueueChunk(audioBuffer);
|
||||
}
|
||||
|
||||
// Accumulate for return
|
||||
if (options.returnAudio) {
|
||||
audioChunks.push(audioBuffer);
|
||||
}
|
||||
|
||||
// Callback
|
||||
callbacks?.onChunk?.(audioBuffer);
|
||||
break;
|
||||
|
||||
case 'done':
|
||||
if (audioPlayer) {
|
||||
audioPlayer.endPlayback();
|
||||
}
|
||||
break;
|
||||
|
||||
case 'error':
|
||||
throw new Error(particle.e);
|
||||
}
|
||||
}
|
||||
|
||||
callbacks?.onComplete?.();
|
||||
|
||||
// Build result
|
||||
const result: SpeexSpeakResult = { success: true };
|
||||
|
||||
if (options.returnAudio && audioChunks.length > 0) {
|
||||
// Concatenate all chunks and convert to base64
|
||||
const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
|
||||
const combined = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of audioChunks) {
|
||||
combined.set(new Uint8Array(chunk), offset);
|
||||
offset += chunk.byteLength;
|
||||
}
|
||||
result.audioBase64 = _arrayBufferToBase64(combined.buffer);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error: any) {
|
||||
// Cleanup
|
||||
if (audioPlayer) {
|
||||
void audioPlayer.stop();
|
||||
}
|
||||
|
||||
const errorMessage = error.message || 'Synthesis failed';
|
||||
callbacks?.onError?.(new Error(errorMessage));
|
||||
return { success: false, error: errorMessage };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* List voices via speex.router
|
||||
*/
|
||||
export async function speexListVoicesRPC(engine: DSpeexEngineAny): Promise<SpeexWire_ListVoices_Output> {
|
||||
const access = _resolveWireAccess(engine);
|
||||
if (!access) {
|
||||
return { voices: [] };
|
||||
}
|
||||
|
||||
try {
|
||||
return await apiAsync.speex.listVoices.query({ access });
|
||||
} catch (error) {
|
||||
console.error('Failed to list voices:', error);
|
||||
return { voices: [] };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Private: Credential Resolution
|
||||
|
||||
function _resolveWireAccess(engine: DSpeexEngineAny): SpeexWire_Access | null {
|
||||
const { vendorType, credentials } = engine;
|
||||
|
||||
// webspeech doesn't use RPC
|
||||
if (vendorType === 'webspeech') return null;
|
||||
|
||||
const dialect = vendorType as SpeexRPCDialect;
|
||||
|
||||
switch (credentials.type) {
|
||||
case 'api-key':
|
||||
return _resolveFromApiKey(dialect, credentials);
|
||||
|
||||
case 'llms-service':
|
||||
return _resolveFromLLMService(dialect, credentials);
|
||||
|
||||
default:
|
||||
// 'none' credentials or unknown type
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function _resolveFromApiKey(dialect: SpeexRPCDialect, credentials: DCredentialsApiKey): SpeexWire_Access | null {
|
||||
switch (dialect) {
|
||||
case 'elevenlabs':
|
||||
if (!credentials.apiKey) return null;
|
||||
return {
|
||||
dialect: 'elevenlabs',
|
||||
apiKey: credentials.apiKey,
|
||||
apiHost: credentials.apiHost,
|
||||
};
|
||||
|
||||
case 'openai':
|
||||
if (!credentials.apiKey) return null;
|
||||
return {
|
||||
dialect: 'openai',
|
||||
apiKey: credentials.apiKey,
|
||||
apiHost: credentials.apiHost,
|
||||
};
|
||||
|
||||
case 'localai':
|
||||
if (!credentials.apiHost) return null;
|
||||
return {
|
||||
dialect: 'localai',
|
||||
apiKey: credentials.apiKey,
|
||||
apiHost: credentials.apiHost,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function _resolveFromLLMService(dialect: SpeexRPCDialect, credentials: DCredentialsLLMSService): SpeexWire_Access | null {
|
||||
const service = findModelsServiceOrNull(credentials.serviceId);
|
||||
if (!service) return null;
|
||||
|
||||
// Extract credentials based on LLM vendor type
|
||||
const setup = service.setup as Record<string, any>;
|
||||
|
||||
switch (dialect) {
|
||||
case 'elevenlabs':
|
||||
// ElevenLabs doesn't typically link to LLM services
|
||||
return null;
|
||||
|
||||
case 'openai':
|
||||
// OpenAI LLM service uses oaiKey, oaiHost, oaiOrg
|
||||
return {
|
||||
dialect: 'openai',
|
||||
apiKey: setup.oaiKey || '',
|
||||
apiHost: setup.oaiHost || undefined,
|
||||
orgId: setup.oaiOrg || undefined,
|
||||
};
|
||||
|
||||
case 'localai':
|
||||
// LocalAI LLM service uses host
|
||||
// LocalAI vendor uses 'localAIHost' field
|
||||
const host = setup.localAIHost || setup.oaiHost || '';
|
||||
if (!host) return null;
|
||||
return {
|
||||
dialect: 'localai',
|
||||
apiHost: host,
|
||||
apiKey: setup.localAIKey || setup.oaiKey || '',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Private: Voice Building
|
||||
|
||||
function _buildWireVoice(engine: DSpeexEngineAny): SpeexWire_Voice {
|
||||
const { vendorType, voice } = engine;
|
||||
|
||||
switch (vendorType) {
|
||||
case 'elevenlabs':
|
||||
return {
|
||||
dialect: 'elevenlabs',
|
||||
voiceId: voice.voiceId,
|
||||
model: voice.ttsModel,
|
||||
};
|
||||
|
||||
case 'openai':
|
||||
return {
|
||||
dialect: 'openai',
|
||||
voiceId: voice.voiceId,
|
||||
model: voice.ttsModel,
|
||||
speed: voice.speed,
|
||||
instruction: voice.instruction,
|
||||
};
|
||||
|
||||
case 'localai':
|
||||
return {
|
||||
dialect: 'localai',
|
||||
voiceId: voice.voiceId,
|
||||
model: voice.ttsModel,
|
||||
};
|
||||
|
||||
case 'webspeech':
|
||||
// webspeech doesn't use wire protocol
|
||||
throw new Error('webspeech does not use RPC');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Private: Helpers
|
||||
|
||||
function _base64ToArrayBuffer(base64: string): ArrayBuffer {
|
||||
const binaryString = atob(base64);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
return bytes.buffer;
|
||||
}
|
||||
|
||||
function _arrayBufferToBase64(buffer: ArrayBuffer): string {
|
||||
const bytes = new Uint8Array(buffer);
|
||||
let binary = '';
|
||||
for (let i = 0; i < bytes.byteLength; i++) {
|
||||
binary += String.fromCharCode(bytes[i]);
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
Reference in New Issue
Block a user