Compare commits

...

1 Commits

Author SHA1 Message Date
claude[bot] 04a83247ee feat: Implement TTS vendor abstraction system
Adds support for multiple TTS providers (OpenAI, ElevenLabs) with vendor abstraction pattern similar to LLM vendors.

Core changes:
- Created /src/modules/tts/ module with vendor abstraction
- Implemented ITTSVendor interface for unified TTS API
- Added vendor implementations for ElevenLabs and OpenAI TTS
- Created store-tts.ts for service and voice configuration
- Implemented unified tts.client.ts for vendor-agnostic speech
- Added OpenAI TTS tRPC router with streaming support
- Updated PersonaChatMessageSpeak to use new TTS client
- Added migration logic for existing ElevenLabs configs
- Updated data.ts to support new voice configuration format

Technical details:
- Service-scoped pattern: activeServiceId + activeVoiceId
- Backward compatible with existing elevenLabs voice configs
- Auto-import capability from LLM configurations
- Supports streaming and non-streaming TTS
- Vendor-specific features handled gracefully

Relates to #858

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Enrico Ros <enricoros@users.noreply.github.com>
2025-10-29 13:31:32 +00:00
12 changed files with 913 additions and 3 deletions
@@ -1,4 +1,4 @@
import { elevenLabsSpeakText } from '~/modules/elevenlabs/elevenlabs.client';
import { speakText } from '~/modules/tts/tts.client';
import { isTextContentFragment } from '~/common/stores/chat/chat.fragments';
@@ -59,6 +59,6 @@ export class PersonaChatMessageSpeak implements PersonaProcessorInterface {
console.log('📢 TTS:', text);
this.spokenLine = true;
// fire/forget: we don't want to stall this loop
void elevenLabsSpeakText(text, undefined, false, true);
void speakText(text, { streaming: false, turbo: true });
}
}
+5 -1
View File
@@ -14,7 +14,11 @@ export type SystemPurposeData = {
examples?: SystemPurposeExample[];
highlighted?: boolean;
call?: { starters?: string[] };
voices?: { elevenLabs?: { voiceId: string } };
voices?: {
tts?: { voiceId?: string };
// Legacy support for existing configs
elevenLabs?: { voiceId: string };
};
};
export type SystemPurposeExample = string | { prompt: string, action?: 'require-data-attachment' };
+57
View File
@@ -0,0 +1,57 @@
import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
import type { DTTSService, TTSGenerationOptions, TTSSpeakResult, TTSVendorId, TTSVoice } from './tts.types';
/**
* TTS Vendor Interface - abstraction for all TTS providers
* Similar to IModelVendor but adapted for TTS services
*/
export interface ITTSVendor<TServiceSettings extends Record<string, any> = {}, TAccess = unknown> {
readonly id: TTSVendorId;
readonly name: string;
readonly displayRank: number; // Display order in UI
readonly location: 'local' | 'cloud';
readonly brandColor?: string;
// Server configuration detection
readonly hasServerConfigKey?: keyof BackendCapabilities;
// Capability flags
readonly capabilities: {
streaming: boolean;
voiceCloning?: boolean;
speedControl?: boolean;
listVoices: boolean;
};
/// Abstraction interface ///
/**
* Initialize default settings for a new service
*/
initializeSetup?(): TServiceSettings;
/**
* Validate service setup (client-side)
*/
validateSetup?(setup: TServiceSettings): boolean;
/**
* Get transport access configuration from setup
*/
getTransportAccess(setup?: Partial<TServiceSettings>): TAccess;
/**
* RPC: Speak text using this vendor's TTS service
*/
rpcSpeak(
access: TAccess,
options: TTSGenerationOptions,
): Promise<AsyncIterable<any>>;
/**
* RPC: List available voices (if supported)
*/
rpcListVoices?(access: TAccess): Promise<{ voices: TTSVoice[] }>;
}
+195
View File
@@ -0,0 +1,195 @@
import * as z from 'zod/v4';
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
import { env } from '~/server/env';
import { fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
// Configuration
const SAFETY_TEXT_LENGTH = 4096; // OpenAI limit
const MIN_CHUNK_SIZE = 4096; // Minimum chunk size in bytes for streaming
// Schema definitions
export const openaiTTSSpeechInputSchema = z.object({
access: z.object({
oaiKey: z.string().optional(),
oaiHost: z.string().optional(),
oaiOrgId: z.string().optional(),
}),
text: z.string(),
voice: z.enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']).default('alloy'),
model: z.enum(['tts-1', 'tts-1-hd']).default('tts-1'),
speed: z.number().min(0.25).max(4.0).optional(),
format: z.enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm']).optional(),
streaming: z.boolean().default(false),
});
export type OpenAITTSSpeechInputSchema = z.infer<typeof openaiTTSSpeechInputSchema>;
export const openaiTTSRouter = createTRPCRouter({
/**
* Speech synthesis procedure using OpenAI TTS API
*/
speech: publicProcedure
.input(openaiTTSSpeechInputSchema)
.mutation(async function* ({ input, ctx }) {
// Start streaming back
yield { control: 'start' };
let text = input.text;
// Safety check: trim text that's too long
if (text.length > SAFETY_TEXT_LENGTH) {
text = text.slice(0, SAFETY_TEXT_LENGTH);
yield { warningMessage: 'text was truncated to maximum length' };
}
let response: Response;
try {
// Prepare the upstream request
const { headers, url } = openaiTTSAccess(input.access);
const body: OpenAITTSWire.TTSRequest = {
input: text,
voice: input.voice,
model: input.model,
response_format: input.format || 'mp3',
...(input.speed ? { speed: input.speed } : {}),
};
// Blocking fetch
response = await fetchResponseOrTRPCThrow({
url,
method: 'POST',
headers,
body,
signal: ctx.reqSignal,
name: 'OpenAI TTS',
});
} catch (error: any) {
yield { errorMessage: `fetch issue: ${error.message || 'Unknown error'}` };
return;
}
// If not streaming, return the entire audio
if (!input.streaming) {
const audioArrayBuffer = await response.arrayBuffer();
yield {
audio: {
base64: Buffer.from(audioArrayBuffer).toString('base64'),
contentType: response.headers.get('content-type') || 'audio/mpeg',
},
};
yield { control: 'end' };
return;
}
const reader = response.body?.getReader();
if (!reader) {
yield { errorMessage: 'stream issue: No reader' };
return;
}
// STREAM the audio chunks back to the client
try {
// Initialize a buffer to accumulate chunks
const accumulatedChunks: Uint8Array[] = [];
let accumulatedSize = 0;
// Read loop
while (true) {
const { value, done: readerDone } = await reader.read();
if (readerDone) break;
if (!value) continue;
// Accumulate chunks
accumulatedChunks.push(value);
accumulatedSize += value.length;
// When accumulated size reaches or exceeds MIN_CHUNK_SIZE, yield the chunk
if (accumulatedSize >= MIN_CHUNK_SIZE) {
yield {
audioChunk: {
base64: Buffer.concat(accumulatedChunks).toString('base64'),
},
};
// Reset the accumulation
accumulatedChunks.length = 0;
accumulatedSize = 0;
}
}
// If there's any remaining data, yield it as well
if (accumulatedSize) {
yield {
audioChunk: {
base64: Buffer.concat(accumulatedChunks).toString('base64'),
},
};
}
} catch (error: any) {
yield { errorMessage: `stream issue: ${error.message || 'Unknown error'}` };
return;
}
// End streaming
yield { control: 'end' };
}),
});
/**
* Helper function to construct OpenAI TTS API access details
*/
export function openaiTTSAccess(access: OpenAITTSSpeechInputSchema['access']): { headers: HeadersInit; url: string } {
// API key
const apiKey = (access.oaiKey || env.OPENAI_API_KEY || '').trim();
if (!apiKey) {
throw new Error('Missing OpenAI API key.');
}
// API host
let host = (access.oaiHost || env.OPENAI_API_HOST || 'api.openai.com').trim();
if (!host.startsWith('http')) {
host = `https://${host}`;
}
if (host.endsWith('/')) {
host = host.slice(0, -1);
}
// Build headers
const headers: HeadersInit = {
'Accept': 'audio/*',
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
};
// Add org ID if provided
if (access.oaiOrgId) {
headers['OpenAI-Organization'] = access.oaiOrgId;
}
return {
headers,
url: `${host}/v1/audio/speech`,
};
}
/// OpenAI TTS API Wire Types
export namespace OpenAITTSWire {
export interface TTSRequest {
input: string;
voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
model: 'tts-1' | 'tts-1-hd';
response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
speed?: number; // 0.25 to 4.0
}
}
+110
View File
@@ -0,0 +1,110 @@
//
// WARNING: Everything here is data at rest. Know what you're doing.
//
import { create } from 'zustand';
import { persist } from 'zustand/middleware';
import type { ITTSVendor } from './ITTSVendor';
import type { DTTSService, TTSServiceId, TTSVendorId } from './tts.types';
/// TTSStore - a store for configured TTS services and settings
export interface TTSStoreState {
// TTS services (configured instances of TTS vendors)
services: DTTSService<any>[];
// Global active service and voice
activeServiceId: TTSServiceId | null;
activeVoiceId: string | null;
}
interface TTSStoreActions {
// Service management
createService: (vendor: ITTSVendor) => DTTSService;
removeService: (id: TTSServiceId) => void;
updateServiceSettings: <TServiceSettings>(id: TTSServiceId, partialSettings: Partial<TServiceSettings>) => void;
// Active selection
setActiveServiceId: (id: TTSServiceId | null) => void;
setActiveVoiceId: (voiceId: string | null) => void;
}
type TTSStore = TTSStoreState & TTSStoreActions;
export const useTTSStore = create<TTSStore>()(persist(
(set, get) => ({
// Initial state
services: [],
activeServiceId: null,
activeVoiceId: null,
// Actions
createService: (vendor: ITTSVendor) => {
const service: DTTSService = {
id: `${vendor.id}-${Date.now()}`,
label: vendor.name,
vId: vendor.id,
setup: vendor.initializeSetup?.() || {},
};
set(state => ({
services: [...state.services, service],
}));
return service;
},
removeService: (id: TTSServiceId) =>
set(state => {
const newServices = state.services.filter(s => s.id !== id);
return {
services: newServices,
// Clear active service if it was removed
activeServiceId: state.activeServiceId === id ? null : state.activeServiceId,
};
}),
updateServiceSettings: <TServiceSettings>(id: TTSServiceId, partialSettings: Partial<TServiceSettings>) =>
set(state => ({
services: state.services.map(service =>
service.id === id
? { ...service, setup: { ...service.setup, ...partialSettings } }
: service,
),
})),
setActiveServiceId: (id: TTSServiceId | null) =>
set({ activeServiceId: id }),
setActiveVoiceId: (voiceId: string | null) =>
set({ activeVoiceId: voiceId }),
}),
{
name: 'app-tts',
}),
));
// Helper functions for accessing TTS store
export function getTTSStoreState(): TTSStoreState {
return useTTSStore.getState();
}
export function getTTSService(serviceId: TTSServiceId): DTTSService | null {
const { services } = useTTSStore.getState();
return services.find(s => s.id === serviceId) || null;
}
export function getActiveTTSService(): DTTSService | null {
const { services, activeServiceId } = useTTSStore.getState();
if (!activeServiceId) return null;
return services.find(s => s.id === activeServiceId) || null;
}
+195
View File
@@ -0,0 +1,195 @@
import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
import { convert_Base64_To_UInt8Array } from '~/common/util/blobUtils';
import { useUIPreferencesStore } from '~/common/stores/store-ui';
import { SystemPurposes, type SystemPurposeId } from '~/data';
import { findTTSVendor } from './vendors.registry';
import { getActiveTTSService, getTTSService, useTTSStore } from './store-tts.ts';
import type { TTSGenerationOptions, TTSSpeakResult, TTSServiceId } from './tts.types';
/**
* Get persona-specific TTS configuration
*/
function getPersonaTTSConfig(personaId?: SystemPurposeId): { serviceId?: TTSServiceId; voiceId?: string } | null {
if (!personaId) return null;
const persona = SystemPurposes[personaId];
if (!persona?.voices) return null;
// Check new tts field first
if (persona.voices.tts?.voiceId) {
return {
voiceId: persona.voices.tts.voiceId,
};
}
// Fall back to legacy elevenLabs field for backward compatibility
if (persona.voices.elevenLabs?.voiceId) {
return {
voiceId: persona.voices.elevenLabs.voiceId,
};
}
return null;
}
/**
* Main TTS invocation function - vendor-agnostic
* Speaks text using the configured TTS service
*/
export async function speakText(
text: string,
options?: {
serviceId?: TTSServiceId; // Override global service
voiceId?: string; // Override global voice
personaId?: SystemPurposeId; // Use persona's voice preference
streaming?: boolean;
turbo?: boolean;
speed?: number;
},
): Promise<TTSSpeakResult> {
// Early validation
if (!text?.trim()) {
return { success: false };
}
// 1. Resolve service
const { services, activeServiceId, activeVoiceId } = useTTSStore.getState();
let serviceId = options?.serviceId;
let voiceId = options?.voiceId;
// Check persona configuration
if (options?.personaId) {
const personaConfig = getPersonaTTSConfig(options.personaId);
if (personaConfig) {
serviceId = personaConfig.serviceId || serviceId;
voiceId = personaConfig.voiceId || voiceId;
}
}
// Fall back to global defaults
serviceId = serviceId || activeServiceId || undefined;
voiceId = voiceId || activeVoiceId || undefined;
if (!serviceId) {
console.warn('TTS: No service configured');
return { success: false };
}
const service = getTTSService(serviceId);
if (!service) {
console.warn('TTS: Service not found:', serviceId);
return { success: false };
}
// 2. Get vendor implementation
const vendor = findTTSVendor(service.vId);
if (!vendor) {
console.warn('TTS: Vendor not found:', service.vId);
return { success: false };
}
// 3. Get transport access
const access = vendor.getTransportAccess(service.setup);
// 4. Prepare generation options
const { preferredLanguage } = useUIPreferencesStore.getState();
const nonEnglish = !(preferredLanguage?.toLowerCase()?.startsWith('en'));
const generationOptions: TTSGenerationOptions = {
text,
voiceId,
streaming: options?.streaming ?? false,
turbo: options?.turbo ?? false,
speed: options?.speed,
nonEnglish,
};
// 5. Execute TTS
try {
const stream = await vendor.rpcSpeak(access, generationOptions);
let liveAudioPlayer: AudioLivePlayer | undefined;
let playbackStarted = false;
let audioBase64: string | undefined;
for await (const piece of stream) {
// Streaming audio chunk
if (piece.audioChunk) {
try {
if (!liveAudioPlayer) {
liveAudioPlayer = new AudioLivePlayer();
}
const chunkArray = convert_Base64_To_UInt8Array(piece.audioChunk.base64, 'tts.client (chunk)');
liveAudioPlayer.enqueueChunk(chunkArray.buffer);
playbackStarted = true;
} catch (audioError) {
console.error('TTS audio chunk error:', audioError);
return { success: false };
}
}
// Full audio buffer
else if (piece.audio) {
try {
if (!options?.streaming) {
audioBase64 = piece.audio.base64;
}
const audioArray = convert_Base64_To_UInt8Array(piece.audio.base64, 'tts.client');
void AudioPlayer.playBuffer(audioArray.buffer);
playbackStarted = true;
} catch (audioError) {
console.error('TTS audio buffer error:', audioError);
return { success: false };
}
}
// Errors
else if (piece.errorMessage) {
console.error('TTS error:', piece.errorMessage);
return { success: false, error: piece.errorMessage };
} else if (piece.warningMessage) {
console.warn('TTS warning:', piece.warningMessage);
} else if (piece.control === 'start' || piece.control === 'end') {
// Control messages - continue processing
}
}
return { success: playbackStarted, audioBase64 };
} catch (error) {
console.error('TTS playback error:', error);
return { success: false, error: String(error) };
}
}
/**
* Check if TTS is available and configured
*/
export function isTTSAvailable(): boolean {
const { services, activeServiceId } = useTTSStore.getState();
// Check if we have an active service
if (activeServiceId) {
const service = services.find(s => s.id === activeServiceId);
if (service) {
const vendor = findTTSVendor(service.vId);
if (vendor?.validateSetup?.(service.setup) !== false) {
return true;
}
}
}
// Check backend capabilities for server-side TTS
const caps = getBackendCapabilities();
return caps.hasVoiceElevenLabs;
}
+87
View File
@@ -0,0 +1,87 @@
import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
import { getElevenLabsData } from '~/modules/elevenlabs/store-module-elevenlabs';
import { useModelsStore } from '~/common/stores/llms/store-llms';
import { findTTSVendor } from './vendors.registry';
import { useTTSStore } from './store-tts';
import type { TTSVendorId } from './tts.types';
/**
* Migrates existing TTS configurations to the new TTS store
* This should be called once on app initialization
*/
export function migrateTTSServices() {
const { services, activeServiceId } = useTTSStore.getState();
// Skip if already migrated (has existing services)
if (services.length > 0) {
return;
}
// 1. Migrate from existing ElevenLabs configuration
const { elevenLabsApiKey, elevenLabsVoiceId } = getElevenLabsData();
const { hasVoiceElevenLabs } = getBackendCapabilities();
if (elevenLabsApiKey || hasVoiceElevenLabs) {
const elevenLabsVendor = findTTSVendor('elevenlabs');
if (elevenLabsVendor) {
const service = useTTSStore.getState().createService(elevenLabsVendor);
// Set up with existing credentials
if (elevenLabsApiKey) {
useTTSStore.getState().updateServiceSettings(service.id, {
elevenKey: elevenLabsApiKey,
});
}
// Set as active service
useTTSStore.getState().setActiveServiceId(service.id);
// Set default voice if available
if (elevenLabsVoiceId) {
useTTSStore.getState().setActiveVoiceId(elevenLabsVoiceId);
}
console.log('TTS: Migrated ElevenLabs configuration to new TTS store');
}
}
// 2. Auto-import from OpenAI LLM configuration
autoImportTTSFromLLMs();
}
/**
* Auto-imports TTS services from configured LLM services
* Creates TTS services when compatible LLM credentials are found
*/
export function autoImportTTSFromLLMs() {
const { sources } = useModelsStore.getState();
const { services } = useTTSStore.getState();
// Check for OpenAI LLM service
const openaiLLMService = sources.find(s => s.vId === 'openai');
if (openaiLLMService && openaiLLMService.setup?.oaiKey) {
// Check if we already have an OpenAI TTS service with this key
const existingOpenAITTS = services.find(
s => s.vId === 'openai' && s.setup.oaiKey === openaiLLMService.setup.oaiKey,
);
if (!existingOpenAITTS) {
const openaiTTSVendor = findTTSVendor('openai');
if (openaiTTSVendor) {
const service = useTTSStore.getState().createService(openaiTTSVendor);
// Copy credentials from LLM service
useTTSStore.getState().updateServiceSettings(service.id, {
oaiKey: openaiLLMService.setup.oaiKey,
oaiHost: openaiLLMService.setup.oaiHost,
oaiOrgId: openaiLLMService.setup.oaiOrgId,
});
console.log('TTS: Auto-imported OpenAI TTS service from LLM configuration');
}
}
}
}
+65
View File
@@ -0,0 +1,65 @@
//
// TTS Core Types
//
export type TTSServiceId = string;
export type TTSVendorId = 'elevenlabs' | 'openai';
/**
* Audio formats supported by TTS services
*/
export type TTSAudioFormat = 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
/**
* Voice representation (unified across all vendors)
*/
export interface TTSVoice {
id: string;
name: string;
description?: string;
previewUrl?: string;
language?: string;
category?: string;
}
/**
* Options for TTS generation (superset of all vendor capabilities)
*/
export interface TTSGenerationOptions {
// Core parameters (all vendors)
text: string;
voiceId?: string;
// Common optional parameters
speed?: number; // 0.25-4.0 (OpenAI TTS)
format?: TTSAudioFormat; // Output audio format
streaming?: boolean; // Enable streaming
// Advanced parameters (vendor-specific, optional)
turbo?: boolean; // ElevenLabs: use turbo model
nonEnglish?: boolean; // ElevenLabs: use multilingual model
}
/**
* Result of TTS generation
*/
export interface TTSSpeakResult {
success: boolean;
audioBase64?: string; // Available when not streaming
error?: string;
}
/**
* TTS Service - configured instance of a TTS vendor
*/
export interface DTTSService<TServiceSettings extends object = {}> {
id: TTSServiceId;
label: string;
// service -> vendor of that service
vId: TTSVendorId;
// service-specific settings
setup: Partial<TServiceSettings>;
}
+25
View File
@@ -0,0 +1,25 @@
import { TTSVendorElevenLabs } from './vendors/elevenlabs/elevenlabs.vendor';
import { TTSVendorOpenAI } from './vendors/openai/openai-tts.vendor';
import type { ITTSVendor } from './ITTSVendor';
import type { TTSVendorId } from './tts.types';
/** Global: TTS Vendor Instances Registry **/
const TTS_VENDOR_REGISTRY: Record<TTSVendorId, ITTSVendor> = {
elevenlabs: TTSVendorElevenLabs,
openai: TTSVendorOpenAI,
} as Record<string, ITTSVendor>;
export function findAllTTSVendors(): ITTSVendor[] {
const vendors = Object.values(TTS_VENDOR_REGISTRY);
vendors.sort((a, b) => a.displayRank - b.displayRank);
return vendors;
}
export function findTTSVendor<TServiceSettings extends object = {}, TAccess = unknown>(
vendorId?: TTSVendorId,
): ITTSVendor<TServiceSettings, TAccess> | null {
return vendorId ? (TTS_VENDOR_REGISTRY[vendorId] as ITTSVendor<TServiceSettings, TAccess>) ?? null : null;
}
+82
View File
@@ -0,0 +1,82 @@
import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
import { apiStream } from '~/common/util/trpc.client';
import type { ITTSVendor } from '../../ITTSVendor';
import type { TTSGenerationOptions, TTSVoice } from '../../tts.types';
// ElevenLabs Service Settings
export interface ElevenLabsServiceSettings {
elevenKey?: string;
elevenHost?: string;
}
// ElevenLabs Access (for RPC calls)
export interface ElevenLabsAccess {
elevenKey?: string;
elevenHost?: string;
}
export const TTSVendorElevenLabs: ITTSVendor<ElevenLabsServiceSettings, ElevenLabsAccess> = {
id: 'elevenlabs',
name: 'ElevenLabs',
displayRank: 10,
location: 'cloud',
brandColor: undefined,
hasServerConfigKey: 'hasVoiceElevenLabs',
capabilities: {
streaming: true,
voiceCloning: true,
speedControl: false,
listVoices: true,
},
initializeSetup(): ElevenLabsServiceSettings {
return {
elevenKey: '',
elevenHost: '',
};
},
validateSetup(setup: ElevenLabsServiceSettings): boolean {
return !setup.elevenKey || setup.elevenKey.trim().length >= 32;
},
getTransportAccess(setup?: Partial<ElevenLabsServiceSettings>): ElevenLabsAccess {
return {
elevenKey: setup?.elevenKey,
elevenHost: setup?.elevenHost,
};
},
async rpcSpeak(access: ElevenLabsAccess, options: TTSGenerationOptions): Promise<AsyncIterable<any>> {
return apiStream.elevenlabs.speech.mutate({
xiKey: access.elevenKey,
voiceId: options.voiceId,
text: options.text,
nonEnglish: options.nonEnglish ?? false,
audioStreaming: options.streaming ?? false,
audioTurbo: options.turbo ?? false,
});
},
async rpcListVoices(access: ElevenLabsAccess): Promise<{ voices: TTSVoice[] }> {
const result = await (apiStream as any).elevenlabs.listVoices.query({
elevenKey: access.elevenKey,
});
return {
voices: result.voices.map((v: any) => ({
id: v.id,
name: v.name,
description: v.description || undefined,
previewUrl: v.previewUrl || undefined,
category: v.category,
})),
};
},
};
+86
View File
@@ -0,0 +1,86 @@
import type { BackendCapabilities } from '~/modules/backend/store-backend-capabilities';
import { apiStream } from '~/common/util/trpc.client';
import type { ITTSVendor } from '../../ITTSVendor';
import type { TTSGenerationOptions, TTSVoice } from '../../tts.types';
// OpenAI TTS Service Settings
export interface OpenAITTSServiceSettings {
oaiKey?: string;
oaiHost?: string;
oaiOrgId?: string;
}
// OpenAI TTS Access (for RPC calls)
export interface OpenAITTSAccess {
oaiKey?: string;
oaiHost?: string;
oaiOrgId?: string;
}
// OpenAI TTS voices (fixed list)
export const OPENAI_TTS_VOICES: TTSVoice[] = [
{ id: 'alloy', name: 'Alloy', description: 'Neutral and balanced' },
{ id: 'echo', name: 'Echo', description: 'Clear and articulate' },
{ id: 'fable', name: 'Fable', description: 'Expressive and warm' },
{ id: 'onyx', name: 'Onyx', description: 'Deep and authoritative' },
{ id: 'nova', name: 'Nova', description: 'Friendly and conversational' },
{ id: 'shimmer', name: 'Shimmer', description: 'Soft and gentle' },
];
export const TTSVendorOpenAI: ITTSVendor<OpenAITTSServiceSettings, OpenAITTSAccess> = {
id: 'openai',
name: 'OpenAI TTS',
displayRank: 20,
location: 'cloud',
brandColor: '#10a37f',
hasServerConfigKey: 'hasLlmOpenAI',
capabilities: {
streaming: true,
voiceCloning: false,
speedControl: true,
listVoices: true,
},
initializeSetup(): OpenAITTSServiceSettings {
return {
oaiKey: '',
oaiHost: '',
oaiOrgId: '',
};
},
validateSetup(setup: OpenAITTSServiceSettings): boolean {
return !setup.oaiKey || setup.oaiKey.trim().startsWith('sk-');
},
getTransportAccess(setup?: Partial<OpenAITTSServiceSettings>): OpenAITTSAccess {
return {
oaiKey: setup?.oaiKey,
oaiHost: setup?.oaiHost,
oaiOrgId: setup?.oaiOrgId,
};
},
async rpcSpeak(access: OpenAITTSAccess, options: TTSGenerationOptions): Promise<AsyncIterable<any>> {
return apiStream.tts.openai.speech.mutate({
access,
text: options.text,
voice: options.voiceId || 'alloy',
model: 'tts-1',
speed: options.speed,
format: options.format,
streaming: options.streaming ?? false,
});
},
async rpcListVoices(access: OpenAITTSAccess): Promise<{ voices: TTSVoice[] }> {
// OpenAI has a fixed set of voices
return { voices: OPENAI_TTS_VOICES };
},
};
+4
View File
@@ -8,6 +8,7 @@ import { llmAnthropicRouter } from '~/modules/llms/server/anthropic/anthropic.ro
import { llmGeminiRouter } from '~/modules/llms/server/gemini/gemini.router';
import { llmOllamaRouter } from '~/modules/llms/server/ollama/ollama.router';
import { llmOpenAIRouter } from '~/modules/llms/server/openai/openai.router';
import { openaiTTSRouter } from '~/modules/tts/server/openai-tts.router';
import { youtubeRouter } from '~/modules/youtube/youtube.router';
/**
@@ -22,6 +23,9 @@ export const appRouterEdge = createTRPCRouter({
llmGemini: llmGeminiRouter,
llmOllama: llmOllamaRouter,
llmOpenAI: llmOpenAIRouter,
tts: createTRPCRouter({
openai: openaiTTSRouter,
}),
youtube: youtubeRouter,
});