mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 04a83247ee |
@@ -1,4 +1,4 @@
|
||||
import { elevenLabsSpeakText } from '~/modules/elevenlabs/elevenlabs.client';
|
||||
import { speakText } from '~/modules/tts/tts.client';
|
||||
|
||||
import { isTextContentFragment } from '~/common/stores/chat/chat.fragments';
|
||||
|
||||
@@ -59,6 +59,6 @@ export class PersonaChatMessageSpeak implements PersonaProcessorInterface {
|
||||
console.log('📢 TTS:', text);
|
||||
this.spokenLine = true;
|
||||
// fire/forget: we don't want to stall this loop
|
||||
void elevenLabsSpeakText(text, undefined, false, true);
|
||||
void speakText(text, { streaming: false, turbo: true });
|
||||
}
|
||||
}
|
||||
|
||||
+5
-1
@@ -14,7 +14,11 @@ export type SystemPurposeData = {
|
||||
examples?: SystemPurposeExample[];
|
||||
highlighted?: boolean;
|
||||
call?: { starters?: string[] };
|
||||
voices?: { elevenLabs?: { voiceId: string } };
|
||||
voices?: {
|
||||
tts?: { voiceId?: string };
|
||||
// Legacy support for existing configs
|
||||
elevenLabs?: { voiceId: string };
|
||||
};
|
||||
};
|
||||
|
||||
export type SystemPurposeExample = string | { prompt: string, action?: 'require-data-attachment' };
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
|
||||
|
||||
import type { DTTSService, TTSGenerationOptions, TTSSpeakResult, TTSVendorId, TTSVoice } from './tts.types';
|
||||
|
||||
|
||||
/**
|
||||
* TTS Vendor Interface - abstraction for all TTS providers
|
||||
* Similar to IModelVendor but adapted for TTS services
|
||||
*/
|
||||
export interface ITTSVendor<TServiceSettings extends Record<string, any> = {}, TAccess = unknown> {
|
||||
readonly id: TTSVendorId;
|
||||
readonly name: string;
|
||||
readonly displayRank: number; // Display order in UI
|
||||
readonly location: 'local' | 'cloud';
|
||||
readonly brandColor?: string;
|
||||
|
||||
// Server configuration detection
|
||||
readonly hasServerConfigKey?: keyof BackendCapabilities;
|
||||
|
||||
// Capability flags
|
||||
readonly capabilities: {
|
||||
streaming: boolean;
|
||||
voiceCloning?: boolean;
|
||||
speedControl?: boolean;
|
||||
listVoices: boolean;
|
||||
};
|
||||
|
||||
/// Abstraction interface ///
|
||||
|
||||
/**
|
||||
* Initialize default settings for a new service
|
||||
*/
|
||||
initializeSetup?(): TServiceSettings;
|
||||
|
||||
/**
|
||||
* Validate service setup (client-side)
|
||||
*/
|
||||
validateSetup?(setup: TServiceSettings): boolean;
|
||||
|
||||
/**
|
||||
* Get transport access configuration from setup
|
||||
*/
|
||||
getTransportAccess(setup?: Partial<TServiceSettings>): TAccess;
|
||||
|
||||
/**
|
||||
* RPC: Speak text using this vendor's TTS service
|
||||
*/
|
||||
rpcSpeak(
|
||||
access: TAccess,
|
||||
options: TTSGenerationOptions,
|
||||
): Promise<AsyncIterable<any>>;
|
||||
|
||||
/**
|
||||
* RPC: List available voices (if supported)
|
||||
*/
|
||||
rpcListVoices?(access: TAccess): Promise<{ voices: TTSVoice[] }>;
|
||||
}
|
||||
@@ -0,0 +1,195 @@
|
||||
import * as z from 'zod/v4';
|
||||
|
||||
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
|
||||
import { env } from '~/server/env';
|
||||
import { fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
|
||||
// Configuration
|
||||
const SAFETY_TEXT_LENGTH = 4096; // OpenAI limit
|
||||
const MIN_CHUNK_SIZE = 4096; // Minimum chunk size in bytes for streaming
|
||||
|
||||
|
||||
// Schema definitions
|
||||
export const openaiTTSSpeechInputSchema = z.object({
|
||||
access: z.object({
|
||||
oaiKey: z.string().optional(),
|
||||
oaiHost: z.string().optional(),
|
||||
oaiOrgId: z.string().optional(),
|
||||
}),
|
||||
text: z.string(),
|
||||
voice: z.enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']).default('alloy'),
|
||||
model: z.enum(['tts-1', 'tts-1-hd']).default('tts-1'),
|
||||
speed: z.number().min(0.25).max(4.0).optional(),
|
||||
format: z.enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm']).optional(),
|
||||
streaming: z.boolean().default(false),
|
||||
});
|
||||
|
||||
export type OpenAITTSSpeechInputSchema = z.infer<typeof openaiTTSSpeechInputSchema>;
|
||||
|
||||
|
||||
export const openaiTTSRouter = createTRPCRouter({
|
||||
|
||||
/**
|
||||
* Speech synthesis procedure using OpenAI TTS API
|
||||
*/
|
||||
speech: publicProcedure
|
||||
.input(openaiTTSSpeechInputSchema)
|
||||
.mutation(async function* ({ input, ctx }) {
|
||||
|
||||
// Start streaming back
|
||||
yield { control: 'start' };
|
||||
|
||||
let text = input.text;
|
||||
|
||||
// Safety check: trim text that's too long
|
||||
if (text.length > SAFETY_TEXT_LENGTH) {
|
||||
text = text.slice(0, SAFETY_TEXT_LENGTH);
|
||||
yield { warningMessage: 'text was truncated to maximum length' };
|
||||
}
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
|
||||
// Prepare the upstream request
|
||||
const { headers, url } = openaiTTSAccess(input.access);
|
||||
const body: OpenAITTSWire.TTSRequest = {
|
||||
input: text,
|
||||
voice: input.voice,
|
||||
model: input.model,
|
||||
response_format: input.format || 'mp3',
|
||||
...(input.speed ? { speed: input.speed } : {}),
|
||||
};
|
||||
|
||||
// Blocking fetch
|
||||
response = await fetchResponseOrTRPCThrow({
|
||||
url,
|
||||
method: 'POST',
|
||||
headers,
|
||||
body,
|
||||
signal: ctx.reqSignal,
|
||||
name: 'OpenAI TTS',
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
yield { errorMessage: `fetch issue: ${error.message || 'Unknown error'}` };
|
||||
return;
|
||||
}
|
||||
|
||||
// If not streaming, return the entire audio
|
||||
if (!input.streaming) {
|
||||
const audioArrayBuffer = await response.arrayBuffer();
|
||||
yield {
|
||||
audio: {
|
||||
base64: Buffer.from(audioArrayBuffer).toString('base64'),
|
||||
contentType: response.headers.get('content-type') || 'audio/mpeg',
|
||||
},
|
||||
};
|
||||
yield { control: 'end' };
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
yield { errorMessage: 'stream issue: No reader' };
|
||||
return;
|
||||
}
|
||||
|
||||
// STREAM the audio chunks back to the client
|
||||
try {
|
||||
|
||||
// Initialize a buffer to accumulate chunks
|
||||
const accumulatedChunks: Uint8Array[] = [];
|
||||
let accumulatedSize = 0;
|
||||
|
||||
// Read loop
|
||||
while (true) {
|
||||
const { value, done: readerDone } = await reader.read();
|
||||
if (readerDone) break;
|
||||
if (!value) continue;
|
||||
|
||||
// Accumulate chunks
|
||||
accumulatedChunks.push(value);
|
||||
accumulatedSize += value.length;
|
||||
|
||||
// When accumulated size reaches or exceeds MIN_CHUNK_SIZE, yield the chunk
|
||||
if (accumulatedSize >= MIN_CHUNK_SIZE) {
|
||||
yield {
|
||||
audioChunk: {
|
||||
base64: Buffer.concat(accumulatedChunks).toString('base64'),
|
||||
},
|
||||
};
|
||||
// Reset the accumulation
|
||||
accumulatedChunks.length = 0;
|
||||
accumulatedSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// If there's any remaining data, yield it as well
|
||||
if (accumulatedSize) {
|
||||
yield {
|
||||
audioChunk: {
|
||||
base64: Buffer.concat(accumulatedChunks).toString('base64'),
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch (error: any) {
|
||||
yield { errorMessage: `stream issue: ${error.message || 'Unknown error'}` };
|
||||
return;
|
||||
}
|
||||
|
||||
// End streaming
|
||||
yield { control: 'end' };
|
||||
}),
|
||||
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* Helper function to construct OpenAI TTS API access details
|
||||
*/
|
||||
export function openaiTTSAccess(access: OpenAITTSSpeechInputSchema['access']): { headers: HeadersInit; url: string } {
|
||||
// API key
|
||||
const apiKey = (access.oaiKey || env.OPENAI_API_KEY || '').trim();
|
||||
if (!apiKey) {
|
||||
throw new Error('Missing OpenAI API key.');
|
||||
}
|
||||
|
||||
// API host
|
||||
let host = (access.oaiHost || env.OPENAI_API_HOST || 'api.openai.com').trim();
|
||||
if (!host.startsWith('http')) {
|
||||
host = `https://${host}`;
|
||||
}
|
||||
if (host.endsWith('/')) {
|
||||
host = host.slice(0, -1);
|
||||
}
|
||||
|
||||
// Build headers
|
||||
const headers: HeadersInit = {
|
||||
'Accept': 'audio/*',
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
};
|
||||
|
||||
// Add org ID if provided
|
||||
if (access.oaiOrgId) {
|
||||
headers['OpenAI-Organization'] = access.oaiOrgId;
|
||||
}
|
||||
|
||||
return {
|
||||
headers,
|
||||
url: `${host}/v1/audio/speech`,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/// OpenAI TTS API Wire Types
|
||||
export namespace OpenAITTSWire {
|
||||
export interface TTSRequest {
|
||||
input: string;
|
||||
voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
|
||||
model: 'tts-1' | 'tts-1-hd';
|
||||
response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
|
||||
speed?: number; // 0.25 to 4.0
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
//
|
||||
// WARNING: Everything here is data at rest. Know what you're doing.
|
||||
//
|
||||
|
||||
import { create } from 'zustand';
|
||||
import { persist } from 'zustand/middleware';
|
||||
|
||||
import type { ITTSVendor } from './ITTSVendor';
|
||||
import type { DTTSService, TTSServiceId, TTSVendorId } from './tts.types';
|
||||
|
||||
|
||||
/// TTSStore - a store for configured TTS services and settings
|
||||
|
||||
export interface TTSStoreState {
|
||||
// TTS services (configured instances of TTS vendors)
|
||||
services: DTTSService<any>[];
|
||||
|
||||
// Global active service and voice
|
||||
activeServiceId: TTSServiceId | null;
|
||||
activeVoiceId: string | null;
|
||||
}
|
||||
|
||||
interface TTSStoreActions {
|
||||
// Service management
|
||||
createService: (vendor: ITTSVendor) => DTTSService;
|
||||
removeService: (id: TTSServiceId) => void;
|
||||
updateServiceSettings: <TServiceSettings>(id: TTSServiceId, partialSettings: Partial<TServiceSettings>) => void;
|
||||
|
||||
// Active selection
|
||||
setActiveServiceId: (id: TTSServiceId | null) => void;
|
||||
setActiveVoiceId: (voiceId: string | null) => void;
|
||||
}
|
||||
|
||||
|
||||
type TTSStore = TTSStoreState & TTSStoreActions;
|
||||
|
||||
|
||||
export const useTTSStore = create<TTSStore>()(persist(
|
||||
(set, get) => ({
|
||||
|
||||
// Initial state
|
||||
services: [],
|
||||
activeServiceId: null,
|
||||
activeVoiceId: null,
|
||||
|
||||
// Actions
|
||||
|
||||
createService: (vendor: ITTSVendor) => {
|
||||
const service: DTTSService = {
|
||||
id: `${vendor.id}-${Date.now()}`,
|
||||
label: vendor.name,
|
||||
vId: vendor.id,
|
||||
setup: vendor.initializeSetup?.() || {},
|
||||
};
|
||||
|
||||
set(state => ({
|
||||
services: [...state.services, service],
|
||||
}));
|
||||
|
||||
return service;
|
||||
},
|
||||
|
||||
removeService: (id: TTSServiceId) =>
|
||||
set(state => {
|
||||
const newServices = state.services.filter(s => s.id !== id);
|
||||
return {
|
||||
services: newServices,
|
||||
// Clear active service if it was removed
|
||||
activeServiceId: state.activeServiceId === id ? null : state.activeServiceId,
|
||||
};
|
||||
}),
|
||||
|
||||
updateServiceSettings: <TServiceSettings>(id: TTSServiceId, partialSettings: Partial<TServiceSettings>) =>
|
||||
set(state => ({
|
||||
services: state.services.map(service =>
|
||||
service.id === id
|
||||
? { ...service, setup: { ...service.setup, ...partialSettings } }
|
||||
: service,
|
||||
),
|
||||
})),
|
||||
|
||||
setActiveServiceId: (id: TTSServiceId | null) =>
|
||||
set({ activeServiceId: id }),
|
||||
|
||||
setActiveVoiceId: (voiceId: string | null) =>
|
||||
set({ activeVoiceId: voiceId }),
|
||||
|
||||
}),
|
||||
{
|
||||
name: 'app-tts',
|
||||
}),
|
||||
));
|
||||
|
||||
|
||||
// Helper functions for accessing TTS store
|
||||
|
||||
export function getTTSStoreState(): TTSStoreState {
|
||||
return useTTSStore.getState();
|
||||
}
|
||||
|
||||
export function getTTSService(serviceId: TTSServiceId): DTTSService | null {
|
||||
const { services } = useTTSStore.getState();
|
||||
return services.find(s => s.id === serviceId) || null;
|
||||
}
|
||||
|
||||
export function getActiveTTSService(): DTTSService | null {
|
||||
const { services, activeServiceId } = useTTSStore.getState();
|
||||
if (!activeServiceId) return null;
|
||||
return services.find(s => s.id === activeServiceId) || null;
|
||||
}
|
||||
@@ -0,0 +1,195 @@
|
||||
import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
|
||||
|
||||
import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
|
||||
import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
|
||||
import { convert_Base64_To_UInt8Array } from '~/common/util/blobUtils';
|
||||
import { useUIPreferencesStore } from '~/common/stores/store-ui';
|
||||
|
||||
import { SystemPurposes, type SystemPurposeId } from '~/data';
|
||||
|
||||
import { findTTSVendor } from './vendors.registry';
|
||||
import { getActiveTTSService, getTTSService, useTTSStore } from './store-tts.ts';
|
||||
import type { TTSGenerationOptions, TTSSpeakResult, TTSServiceId } from './tts.types';
|
||||
|
||||
|
||||
/**
|
||||
* Get persona-specific TTS configuration
|
||||
*/
|
||||
function getPersonaTTSConfig(personaId?: SystemPurposeId): { serviceId?: TTSServiceId; voiceId?: string } | null {
|
||||
if (!personaId) return null;
|
||||
|
||||
const persona = SystemPurposes[personaId];
|
||||
if (!persona?.voices) return null;
|
||||
|
||||
// Check new tts field first
|
||||
if (persona.voices.tts?.voiceId) {
|
||||
return {
|
||||
voiceId: persona.voices.tts.voiceId,
|
||||
};
|
||||
}
|
||||
|
||||
// Fall back to legacy elevenLabs field for backward compatibility
|
||||
if (persona.voices.elevenLabs?.voiceId) {
|
||||
return {
|
||||
voiceId: persona.voices.elevenLabs.voiceId,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Main TTS invocation function - vendor-agnostic
|
||||
* Speaks text using the configured TTS service
|
||||
*/
|
||||
export async function speakText(
|
||||
text: string,
|
||||
options?: {
|
||||
serviceId?: TTSServiceId; // Override global service
|
||||
voiceId?: string; // Override global voice
|
||||
personaId?: SystemPurposeId; // Use persona's voice preference
|
||||
streaming?: boolean;
|
||||
turbo?: boolean;
|
||||
speed?: number;
|
||||
},
|
||||
): Promise<TTSSpeakResult> {
|
||||
// Early validation
|
||||
if (!text?.trim()) {
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// 1. Resolve service
|
||||
const { services, activeServiceId, activeVoiceId } = useTTSStore.getState();
|
||||
|
||||
let serviceId = options?.serviceId;
|
||||
let voiceId = options?.voiceId;
|
||||
|
||||
// Check persona configuration
|
||||
if (options?.personaId) {
|
||||
const personaConfig = getPersonaTTSConfig(options.personaId);
|
||||
if (personaConfig) {
|
||||
serviceId = personaConfig.serviceId || serviceId;
|
||||
voiceId = personaConfig.voiceId || voiceId;
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to global defaults
|
||||
serviceId = serviceId || activeServiceId || undefined;
|
||||
voiceId = voiceId || activeVoiceId || undefined;
|
||||
|
||||
if (!serviceId) {
|
||||
console.warn('TTS: No service configured');
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
const service = getTTSService(serviceId);
|
||||
if (!service) {
|
||||
console.warn('TTS: Service not found:', serviceId);
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// 2. Get vendor implementation
|
||||
const vendor = findTTSVendor(service.vId);
|
||||
if (!vendor) {
|
||||
console.warn('TTS: Vendor not found:', service.vId);
|
||||
return { success: false };
|
||||
}
|
||||
|
||||
// 3. Get transport access
|
||||
const access = vendor.getTransportAccess(service.setup);
|
||||
|
||||
// 4. Prepare generation options
|
||||
const { preferredLanguage } = useUIPreferencesStore.getState();
|
||||
const nonEnglish = !(preferredLanguage?.toLowerCase()?.startsWith('en'));
|
||||
|
||||
const generationOptions: TTSGenerationOptions = {
|
||||
text,
|
||||
voiceId,
|
||||
streaming: options?.streaming ?? false,
|
||||
turbo: options?.turbo ?? false,
|
||||
speed: options?.speed,
|
||||
nonEnglish,
|
||||
};
|
||||
|
||||
// 5. Execute TTS
|
||||
try {
|
||||
const stream = await vendor.rpcSpeak(access, generationOptions);
|
||||
|
||||
let liveAudioPlayer: AudioLivePlayer | undefined;
|
||||
let playbackStarted = false;
|
||||
let audioBase64: string | undefined;
|
||||
|
||||
for await (const piece of stream) {
|
||||
// Streaming audio chunk
|
||||
if (piece.audioChunk) {
|
||||
try {
|
||||
if (!liveAudioPlayer) {
|
||||
liveAudioPlayer = new AudioLivePlayer();
|
||||
}
|
||||
|
||||
const chunkArray = convert_Base64_To_UInt8Array(piece.audioChunk.base64, 'tts.client (chunk)');
|
||||
liveAudioPlayer.enqueueChunk(chunkArray.buffer);
|
||||
playbackStarted = true;
|
||||
} catch (audioError) {
|
||||
console.error('TTS audio chunk error:', audioError);
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Full audio buffer
|
||||
else if (piece.audio) {
|
||||
try {
|
||||
if (!options?.streaming) {
|
||||
audioBase64 = piece.audio.base64;
|
||||
}
|
||||
|
||||
const audioArray = convert_Base64_To_UInt8Array(piece.audio.base64, 'tts.client');
|
||||
void AudioPlayer.playBuffer(audioArray.buffer);
|
||||
playbackStarted = true;
|
||||
} catch (audioError) {
|
||||
console.error('TTS audio buffer error:', audioError);
|
||||
return { success: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Errors
|
||||
else if (piece.errorMessage) {
|
||||
console.error('TTS error:', piece.errorMessage);
|
||||
return { success: false, error: piece.errorMessage };
|
||||
} else if (piece.warningMessage) {
|
||||
console.warn('TTS warning:', piece.warningMessage);
|
||||
} else if (piece.control === 'start' || piece.control === 'end') {
|
||||
// Control messages - continue processing
|
||||
}
|
||||
}
|
||||
|
||||
return { success: playbackStarted, audioBase64 };
|
||||
} catch (error) {
|
||||
console.error('TTS playback error:', error);
|
||||
return { success: false, error: String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if TTS is available and configured
|
||||
*/
|
||||
export function isTTSAvailable(): boolean {
|
||||
const { services, activeServiceId } = useTTSStore.getState();
|
||||
|
||||
// Check if we have an active service
|
||||
if (activeServiceId) {
|
||||
const service = services.find(s => s.id === activeServiceId);
|
||||
if (service) {
|
||||
const vendor = findTTSVendor(service.vId);
|
||||
if (vendor?.validateSetup?.(service.setup) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check backend capabilities for server-side TTS
|
||||
const caps = getBackendCapabilities();
|
||||
return caps.hasVoiceElevenLabs;
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
|
||||
import { getElevenLabsData } from '~/modules/elevenlabs/store-module-elevenlabs';
|
||||
import { useModelsStore } from '~/common/stores/llms/store-llms';
|
||||
|
||||
import { findTTSVendor } from './vendors.registry';
|
||||
import { useTTSStore } from './store-tts';
|
||||
import type { TTSVendorId } from './tts.types';
|
||||
|
||||
|
||||
/**
|
||||
* Migrates existing TTS configurations to the new TTS store
|
||||
* This should be called once on app initialization
|
||||
*/
|
||||
export function migrateTTSServices() {
|
||||
const { services, activeServiceId } = useTTSStore.getState();
|
||||
|
||||
// Skip if already migrated (has existing services)
|
||||
if (services.length > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. Migrate from existing ElevenLabs configuration
|
||||
const { elevenLabsApiKey, elevenLabsVoiceId } = getElevenLabsData();
|
||||
const { hasVoiceElevenLabs } = getBackendCapabilities();
|
||||
|
||||
if (elevenLabsApiKey || hasVoiceElevenLabs) {
|
||||
const elevenLabsVendor = findTTSVendor('elevenlabs');
|
||||
if (elevenLabsVendor) {
|
||||
const service = useTTSStore.getState().createService(elevenLabsVendor);
|
||||
|
||||
// Set up with existing credentials
|
||||
if (elevenLabsApiKey) {
|
||||
useTTSStore.getState().updateServiceSettings(service.id, {
|
||||
elevenKey: elevenLabsApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
// Set as active service
|
||||
useTTSStore.getState().setActiveServiceId(service.id);
|
||||
|
||||
// Set default voice if available
|
||||
if (elevenLabsVoiceId) {
|
||||
useTTSStore.getState().setActiveVoiceId(elevenLabsVoiceId);
|
||||
}
|
||||
|
||||
console.log('TTS: Migrated ElevenLabs configuration to new TTS store');
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Auto-import from OpenAI LLM configuration
|
||||
autoImportTTSFromLLMs();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Auto-imports TTS services from configured LLM services
|
||||
* Creates TTS services when compatible LLM credentials are found
|
||||
*/
|
||||
export function autoImportTTSFromLLMs() {
|
||||
const { sources } = useModelsStore.getState();
|
||||
const { services } = useTTSStore.getState();
|
||||
|
||||
// Check for OpenAI LLM service
|
||||
const openaiLLMService = sources.find(s => s.vId === 'openai');
|
||||
if (openaiLLMService && openaiLLMService.setup?.oaiKey) {
|
||||
// Check if we already have an OpenAI TTS service with this key
|
||||
const existingOpenAITTS = services.find(
|
||||
s => s.vId === 'openai' && s.setup.oaiKey === openaiLLMService.setup.oaiKey,
|
||||
);
|
||||
|
||||
if (!existingOpenAITTS) {
|
||||
const openaiTTSVendor = findTTSVendor('openai');
|
||||
if (openaiTTSVendor) {
|
||||
const service = useTTSStore.getState().createService(openaiTTSVendor);
|
||||
|
||||
// Copy credentials from LLM service
|
||||
useTTSStore.getState().updateServiceSettings(service.id, {
|
||||
oaiKey: openaiLLMService.setup.oaiKey,
|
||||
oaiHost: openaiLLMService.setup.oaiHost,
|
||||
oaiOrgId: openaiLLMService.setup.oaiOrgId,
|
||||
});
|
||||
|
||||
console.log('TTS: Auto-imported OpenAI TTS service from LLM configuration');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
//
|
||||
// TTS Core Types
|
||||
//
|
||||
|
||||
export type TTSServiceId = string;
|
||||
|
||||
export type TTSVendorId = 'elevenlabs' | 'openai';
|
||||
|
||||
/**
|
||||
* Audio formats supported by TTS services
|
||||
*/
|
||||
export type TTSAudioFormat = 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
|
||||
|
||||
/**
|
||||
* Voice representation (unified across all vendors)
|
||||
*/
|
||||
export interface TTSVoice {
|
||||
id: string;
|
||||
name: string;
|
||||
description?: string;
|
||||
previewUrl?: string;
|
||||
language?: string;
|
||||
category?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for TTS generation (superset of all vendor capabilities)
|
||||
*/
|
||||
export interface TTSGenerationOptions {
|
||||
// Core parameters (all vendors)
|
||||
text: string;
|
||||
voiceId?: string;
|
||||
|
||||
// Common optional parameters
|
||||
speed?: number; // 0.25-4.0 (OpenAI TTS)
|
||||
format?: TTSAudioFormat; // Output audio format
|
||||
streaming?: boolean; // Enable streaming
|
||||
|
||||
// Advanced parameters (vendor-specific, optional)
|
||||
turbo?: boolean; // ElevenLabs: use turbo model
|
||||
nonEnglish?: boolean; // ElevenLabs: use multilingual model
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of TTS generation
|
||||
*/
|
||||
export interface TTSSpeakResult {
|
||||
success: boolean;
|
||||
audioBase64?: string; // Available when not streaming
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* TTS Service - configured instance of a TTS vendor
|
||||
*/
|
||||
export interface DTTSService<TServiceSettings extends object = {}> {
|
||||
id: TTSServiceId;
|
||||
label: string;
|
||||
|
||||
// service -> vendor of that service
|
||||
vId: TTSVendorId;
|
||||
|
||||
// service-specific settings
|
||||
setup: Partial<TServiceSettings>;
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import { TTSVendorElevenLabs } from './vendors/elevenlabs/elevenlabs.vendor';
|
||||
import { TTSVendorOpenAI } from './vendors/openai/openai-tts.vendor';
|
||||
|
||||
import type { ITTSVendor } from './ITTSVendor';
|
||||
import type { TTSVendorId } from './tts.types';
|
||||
|
||||
|
||||
/** Global: TTS Vendor Instances Registry **/
|
||||
const TTS_VENDOR_REGISTRY: Record<TTSVendorId, ITTSVendor> = {
|
||||
elevenlabs: TTSVendorElevenLabs,
|
||||
openai: TTSVendorOpenAI,
|
||||
} as Record<string, ITTSVendor>;
|
||||
|
||||
|
||||
export function findAllTTSVendors(): ITTSVendor[] {
|
||||
const vendors = Object.values(TTS_VENDOR_REGISTRY);
|
||||
vendors.sort((a, b) => a.displayRank - b.displayRank);
|
||||
return vendors;
|
||||
}
|
||||
|
||||
export function findTTSVendor<TServiceSettings extends object = {}, TAccess = unknown>(
|
||||
vendorId?: TTSVendorId,
|
||||
): ITTSVendor<TServiceSettings, TAccess> | null {
|
||||
return vendorId ? (TTS_VENDOR_REGISTRY[vendorId] as ITTSVendor<TServiceSettings, TAccess>) ?? null : null;
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
|
||||
|
||||
import { apiStream } from '~/common/util/trpc.client';
|
||||
|
||||
import type { ITTSVendor } from '../../ITTSVendor';
|
||||
import type { TTSGenerationOptions, TTSVoice } from '../../tts.types';
|
||||
|
||||
|
||||
// ElevenLabs Service Settings
|
||||
export interface ElevenLabsServiceSettings {
|
||||
elevenKey?: string;
|
||||
elevenHost?: string;
|
||||
}
|
||||
|
||||
// ElevenLabs Access (for RPC calls)
|
||||
export interface ElevenLabsAccess {
|
||||
elevenKey?: string;
|
||||
elevenHost?: string;
|
||||
}
|
||||
|
||||
|
||||
export const TTSVendorElevenLabs: ITTSVendor<ElevenLabsServiceSettings, ElevenLabsAccess> = {
|
||||
id: 'elevenlabs',
|
||||
name: 'ElevenLabs',
|
||||
displayRank: 10,
|
||||
location: 'cloud',
|
||||
brandColor: undefined,
|
||||
|
||||
hasServerConfigKey: 'hasVoiceElevenLabs',
|
||||
|
||||
capabilities: {
|
||||
streaming: true,
|
||||
voiceCloning: true,
|
||||
speedControl: false,
|
||||
listVoices: true,
|
||||
},
|
||||
|
||||
initializeSetup(): ElevenLabsServiceSettings {
|
||||
return {
|
||||
elevenKey: '',
|
||||
elevenHost: '',
|
||||
};
|
||||
},
|
||||
|
||||
validateSetup(setup: ElevenLabsServiceSettings): boolean {
|
||||
return !setup.elevenKey || setup.elevenKey.trim().length >= 32;
|
||||
},
|
||||
|
||||
getTransportAccess(setup?: Partial<ElevenLabsServiceSettings>): ElevenLabsAccess {
|
||||
return {
|
||||
elevenKey: setup?.elevenKey,
|
||||
elevenHost: setup?.elevenHost,
|
||||
};
|
||||
},
|
||||
|
||||
async rpcSpeak(access: ElevenLabsAccess, options: TTSGenerationOptions): Promise<AsyncIterable<any>> {
|
||||
return apiStream.elevenlabs.speech.mutate({
|
||||
xiKey: access.elevenKey,
|
||||
voiceId: options.voiceId,
|
||||
text: options.text,
|
||||
nonEnglish: options.nonEnglish ?? false,
|
||||
audioStreaming: options.streaming ?? false,
|
||||
audioTurbo: options.turbo ?? false,
|
||||
});
|
||||
},
|
||||
|
||||
async rpcListVoices(access: ElevenLabsAccess): Promise<{ voices: TTSVoice[] }> {
|
||||
const result = await (apiStream as any).elevenlabs.listVoices.query({
|
||||
elevenKey: access.elevenKey,
|
||||
});
|
||||
|
||||
return {
|
||||
voices: result.voices.map((v: any) => ({
|
||||
id: v.id,
|
||||
name: v.name,
|
||||
description: v.description || undefined,
|
||||
previewUrl: v.previewUrl || undefined,
|
||||
category: v.category,
|
||||
})),
|
||||
};
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,86 @@
|
||||
import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
|
||||
|
||||
import { apiStream } from '~/common/util/trpc.client';
|
||||
|
||||
import type { ITTSVendor } from '../../ITTSVendor';
|
||||
import type { TTSGenerationOptions, TTSVoice } from '../../tts.types';
|
||||
|
||||
|
||||
// OpenAI TTS Service Settings
|
||||
export interface OpenAITTSServiceSettings {
|
||||
oaiKey?: string;
|
||||
oaiHost?: string;
|
||||
oaiOrgId?: string;
|
||||
}
|
||||
|
||||
// OpenAI TTS Access (for RPC calls)
|
||||
export interface OpenAITTSAccess {
|
||||
oaiKey?: string;
|
||||
oaiHost?: string;
|
||||
oaiOrgId?: string;
|
||||
}
|
||||
|
||||
// OpenAI TTS voices (fixed list)
|
||||
export const OPENAI_TTS_VOICES: TTSVoice[] = [
|
||||
{ id: 'alloy', name: 'Alloy', description: 'Neutral and balanced' },
|
||||
{ id: 'echo', name: 'Echo', description: 'Clear and articulate' },
|
||||
{ id: 'fable', name: 'Fable', description: 'Expressive and warm' },
|
||||
{ id: 'onyx', name: 'Onyx', description: 'Deep and authoritative' },
|
||||
{ id: 'nova', name: 'Nova', description: 'Friendly and conversational' },
|
||||
{ id: 'shimmer', name: 'Shimmer', description: 'Soft and gentle' },
|
||||
];
|
||||
|
||||
|
||||
export const TTSVendorOpenAI: ITTSVendor<OpenAITTSServiceSettings, OpenAITTSAccess> = {
|
||||
id: 'openai',
|
||||
name: 'OpenAI TTS',
|
||||
displayRank: 20,
|
||||
location: 'cloud',
|
||||
brandColor: '#10a37f',
|
||||
|
||||
hasServerConfigKey: 'hasLlmOpenAI',
|
||||
|
||||
capabilities: {
|
||||
streaming: true,
|
||||
voiceCloning: false,
|
||||
speedControl: true,
|
||||
listVoices: true,
|
||||
},
|
||||
|
||||
initializeSetup(): OpenAITTSServiceSettings {
|
||||
return {
|
||||
oaiKey: '',
|
||||
oaiHost: '',
|
||||
oaiOrgId: '',
|
||||
};
|
||||
},
|
||||
|
||||
validateSetup(setup: OpenAITTSServiceSettings): boolean {
|
||||
return !setup.oaiKey || setup.oaiKey.trim().startsWith('sk-');
|
||||
},
|
||||
|
||||
getTransportAccess(setup?: Partial<OpenAITTSServiceSettings>): OpenAITTSAccess {
|
||||
return {
|
||||
oaiKey: setup?.oaiKey,
|
||||
oaiHost: setup?.oaiHost,
|
||||
oaiOrgId: setup?.oaiOrgId,
|
||||
};
|
||||
},
|
||||
|
||||
async rpcSpeak(access: OpenAITTSAccess, options: TTSGenerationOptions): Promise<AsyncIterable<any>> {
|
||||
return apiStream.tts.openai.speech.mutate({
|
||||
access,
|
||||
text: options.text,
|
||||
voice: options.voiceId || 'alloy',
|
||||
model: 'tts-1',
|
||||
speed: options.speed,
|
||||
format: options.format,
|
||||
streaming: options.streaming ?? false,
|
||||
});
|
||||
},
|
||||
|
||||
async rpcListVoices(access: OpenAITTSAccess): Promise<{ voices: TTSVoice[] }> {
|
||||
// OpenAI has a fixed set of voices
|
||||
return { voices: OPENAI_TTS_VOICES };
|
||||
},
|
||||
};
|
||||
@@ -8,6 +8,7 @@ import { llmAnthropicRouter } from '~/modules/llms/server/anthropic/anthropic.ro
|
||||
import { llmGeminiRouter } from '~/modules/llms/server/gemini/gemini.router';
|
||||
import { llmOllamaRouter } from '~/modules/llms/server/ollama/ollama.router';
|
||||
import { llmOpenAIRouter } from '~/modules/llms/server/openai/openai.router';
|
||||
import { openaiTTSRouter } from '~/modules/tts/server/openai-tts.router';
|
||||
import { youtubeRouter } from '~/modules/youtube/youtube.router';
|
||||
|
||||
/**
|
||||
@@ -22,6 +23,9 @@ export const appRouterEdge = createTRPCRouter({
|
||||
llmGemini: llmGeminiRouter,
|
||||
llmOllama: llmOllamaRouter,
|
||||
llmOpenAI: llmOpenAIRouter,
|
||||
tts: createTRPCRouter({
|
||||
openai: openaiTTSRouter,
|
||||
}),
|
||||
youtube: youtubeRouter,
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user