mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
Speex: LocalAI vendor
This commit is contained in:
@@ -230,23 +230,24 @@ function LocalAIConfig({ engine, onUpdate, mode }: {
|
||||
const voice = engine.voice as DVoiceLocalAI;
|
||||
|
||||
return <>
|
||||
<FormControl>
|
||||
<FormLabelStart title='Voice ID' description='LocalAI voice identifier' />
|
||||
<Input
|
||||
value={voice.voiceId ?? ''}
|
||||
onChange={(e) => onUpdate({ voice: { ...voice, voiceId: e.target.value } })}
|
||||
placeholder='e.g., en-us-amy-low'
|
||||
/>
|
||||
<FormHelperText>Depends on your LocalAI TTS configuration</FormHelperText>
|
||||
</FormControl>
|
||||
|
||||
<FormControl>
|
||||
<FormLabelStart title='Model' description='TTS model name' />
|
||||
<Input
|
||||
value={voice.ttsModel ?? ''}
|
||||
onChange={(e) => onUpdate({ voice: { ...voice, ttsModel: e.target.value } })}
|
||||
placeholder='e.g., piper'
|
||||
placeholder='e.g., kokoro'
|
||||
/>
|
||||
<FormHelperText>Model to use for speech synthesis</FormHelperText>
|
||||
</FormControl>
|
||||
|
||||
<FormControl>
|
||||
<FormLabelStart title='Backend' description='TTS backend (optional)' />
|
||||
<Input
|
||||
value={voice.ttsBackend ?? ''}
|
||||
onChange={(e) => onUpdate({ voice: { ...voice, ttsBackend: e.target.value || undefined } })}
|
||||
placeholder='e.g., coqui, bark, piper'
|
||||
/>
|
||||
<FormHelperText>Leave empty for default backend</FormHelperText>
|
||||
</FormControl>
|
||||
</>;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
import { createTRPCRouter, edgeProcedure } from '~/server/trpc/trpc.server';
|
||||
|
||||
import { SpeexSpeechParticle, SpeexWire, SpeexWire_ListVoices_Output } from './speex.wiretypes';
|
||||
|
||||
import { SpeexSpeechParticle, SpeexWire, SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './speex.wiretypes';
|
||||
import { listVoicesElevenLabs, synthesizeElevenLabs } from './synthesize-elevenlabs';
|
||||
import { synthesizeOpenAIProtocol } from './synthesize-openai';
|
||||
|
||||
|
||||
interface SynthesizeBackendFnParams<TSpeexAccess extends SpeexWire_Access> {
|
||||
access: TSpeexAccess;
|
||||
text: string;
|
||||
voice: SpeexWire_Voice;
|
||||
streaming: boolean;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export type SynthesizeBackendFn<TSpeexAccess extends SpeexWire_Access> = (params: SynthesizeBackendFnParams<TSpeexAccess>) => AsyncGenerator<SpeexSpeechParticle>;
|
||||
|
||||
|
||||
export const speexRouter = createTRPCRouter({
|
||||
|
||||
/**
|
||||
@@ -16,11 +26,8 @@ export const speexRouter = createTRPCRouter({
|
||||
.input(SpeexWire.Synthesize_input_schema)
|
||||
.mutation(async function* ({ input, ctx }): AsyncGenerator<SpeexSpeechParticle> {
|
||||
const { access, text, voice, streaming } = input;
|
||||
|
||||
try {
|
||||
yield { t: 'start' };
|
||||
|
||||
// Route based on access.dialect discriminant
|
||||
switch (access.dialect) {
|
||||
case 'elevenlabs':
|
||||
yield* synthesizeElevenLabs({ access, text, voice, streaming, signal: ctx.reqSignal });
|
||||
@@ -32,9 +39,8 @@ export const speexRouter = createTRPCRouter({
|
||||
break;
|
||||
|
||||
default:
|
||||
yield { t: 'error', e: 'Unknown dialect' };
|
||||
const _exhaustiveCheck: never = access;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
yield { t: 'error', e: error instanceof Error ? error.message : 'Synthesis failed' };
|
||||
}
|
||||
|
||||
@@ -59,8 +59,9 @@ export namespace SpeexWire {
|
||||
|
||||
export const LocalAI_schema = z.object({
|
||||
dialect: z.literal('localai'),
|
||||
voiceId: z.string().optional(),
|
||||
model: z.string().optional(),
|
||||
backend: z.string().optional(), // ttsBackend (e.g., 'coqui', 'bark', 'piper', 'vall-e-x')
|
||||
model: z.string().optional(), // ttsModel (e.g., 'kokoro', 'tts_models/en/ljspeech/glow-tts')
|
||||
language: z.string().optional(), // for multilingual models like xtts_v2
|
||||
});
|
||||
|
||||
export const OpenAI_schema = z.object({
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { env } from '~/server/env.server';
|
||||
import { fetchJsonOrTRPCThrow, fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
import type { SpeexSpeechParticle, SpeexWire_Access_ElevenLabs, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './speex.wiretypes';
|
||||
import type { SpeexWire_Access_ElevenLabs, SpeexWire_ListVoices_Output } from './speex.wiretypes';
|
||||
import type { SynthesizeBackendFn } from './speex.router';
|
||||
|
||||
|
||||
// configuration
|
||||
@@ -10,16 +11,7 @@ const MIN_CHUNK_SIZE = 4096;
|
||||
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Rachel
|
||||
|
||||
|
||||
interface SynthesizeElevenLabsParams {
|
||||
access: SpeexWire_Access_ElevenLabs;
|
||||
text: string;
|
||||
voice: SpeexWire_Voice;
|
||||
streaming: boolean;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
|
||||
export async function* synthesizeElevenLabs(params: SynthesizeElevenLabsParams): AsyncGenerator<SpeexSpeechParticle> {
|
||||
export const synthesizeElevenLabs: SynthesizeBackendFn<SpeexWire_Access_ElevenLabs> = async function* (params) {
|
||||
const { access, text: inputText, voice, streaming, signal } = params;
|
||||
|
||||
// Safety check: trim text that's too long
|
||||
@@ -27,8 +19,8 @@ export async function* synthesizeElevenLabs(params: SynthesizeElevenLabsParams):
|
||||
if (text.length > SAFETY_TEXT_LENGTH)
|
||||
text = text.slice(0, SAFETY_TEXT_LENGTH);
|
||||
|
||||
// Build request
|
||||
const voiceId = voice.voiceId || DEFAULT_VOICE_ID;
|
||||
// Build request - narrow to elevenlabs dialect for type safety
|
||||
const voiceId = (voice.dialect === 'elevenlabs' ? voice.voiceId : undefined) || DEFAULT_VOICE_ID;
|
||||
const model = voice.model || 'eleven_turbo_v2_5';
|
||||
const path = `/v1/text-to-speech/${voiceId}${streaming ? '/stream' : ''}`;
|
||||
const { headers, url } = _elevenlabsAccess(access, path);
|
||||
@@ -106,7 +98,7 @@ export async function* synthesizeElevenLabs(params: SynthesizeElevenLabsParams):
|
||||
} catch (error: any) {
|
||||
yield { t: 'error', e: `ElevenLabs stream error: ${error.message || 'Unknown error'}` };
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
export async function listVoicesElevenLabs(access: SpeexWire_Access_ElevenLabs): Promise<SpeexWire_ListVoices_Output> {
|
||||
|
||||
@@ -5,78 +5,92 @@
|
||||
* Endpoint: POST /v1/audio/speech
|
||||
*/
|
||||
|
||||
import { fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
import { fetchJsonOrTRPCThrow, fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
|
||||
|
||||
import type { SpeexSpeechParticle, SpeexWire_Access_OpenAI, SpeexWire_Voice } from './speex.wiretypes';
|
||||
import type { SynthesizeBackendFn } from './speex.router';
|
||||
import type { SpeexWire_Access_OpenAI, SpeexWire_ListVoices_Output } from './speex.wiretypes';
|
||||
|
||||
|
||||
// configuration
|
||||
const SAFETY_TEXT_LENGTH = 4096; // OpenAI max
|
||||
const MIN_CHUNK_SIZE = 4096;
|
||||
const DEFAULT_VOICE_ID = 'alloy';
|
||||
const DEFAULT_MODEL = 'tts-1';
|
||||
const MIN_CHUNK_SIZE = 4096; // bytes
|
||||
const FALLBACK_OPENAI_MODEL = 'tts-1';
|
||||
const FALLBACK_OPENAI_VOICE_ID = 'alloy';
|
||||
|
||||
|
||||
interface SynthesizeOpenAIParams {
|
||||
access: SpeexWire_Access_OpenAI;
|
||||
text: string;
|
||||
voice: SpeexWire_Voice;
|
||||
streaming: boolean;
|
||||
signal?: AbortSignal;
|
||||
/** OpenAI TTS API: POST /v1/audio/speech */
|
||||
interface OpenAIWire_TTSRequest {
|
||||
input: string;
|
||||
model: string; // required: 'tts-1', 'tts-1-hd', 'gpt-4o-mini-tts'
|
||||
voice: string; // required: 'alloy', 'echo', 'fable', etc.
|
||||
response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
|
||||
speed?: number; // 0.25-4.0
|
||||
instructions?: string; // voice instructions
|
||||
}
|
||||
|
||||
/** LocalAI TTS API: POST /v1/audio/speech (OpenAI-similar) */
|
||||
interface LocalAIWire_TTSRequest {
|
||||
input: string;
|
||||
model?: string; // optional: e.g., 'kokoro'
|
||||
backend?: string; // optional: 'coqui', 'bark', 'piper', 'transformers-musicgen', 'vall-e-x'
|
||||
language?: string; // optional: for multilingual models
|
||||
response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm'; // defaults to 'wav', 'mp3' also seem to work well, with kokoro at least
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Synthesize speech using OpenAI-compatible TTS API.
|
||||
* Works with both OpenAI and LocalAI dialects.
|
||||
* Synthesize speech using OpenAI-compatible/similar TTS API.
|
||||
*/
|
||||
export async function* synthesizeOpenAIProtocol(params: SynthesizeOpenAIParams): AsyncGenerator<SpeexSpeechParticle> {
|
||||
export const synthesizeOpenAIProtocol: SynthesizeBackendFn<SpeexWire_Access_OpenAI> = async function* (params) {
|
||||
|
||||
const { access, text: inputText, voice, streaming, signal } = params;
|
||||
|
||||
// Safety check: trim text that's too long
|
||||
// safety check: trim text that's too long
|
||||
let text = inputText;
|
||||
if (text.length > SAFETY_TEXT_LENGTH)
|
||||
text = text.slice(0, SAFETY_TEXT_LENGTH);
|
||||
|
||||
// Resolve host and API key based on dialect
|
||||
|
||||
// request.headers
|
||||
const { host, apiKey } = _resolveAccess(access);
|
||||
|
||||
// Build request
|
||||
const voiceId = voice.voiceId || DEFAULT_VOICE_ID;
|
||||
const model = voice.model || DEFAULT_MODEL;
|
||||
const url = `${host}/v1/audio/speech`;
|
||||
|
||||
const body: OpenAIWire_TTSRequest = {
|
||||
input: text,
|
||||
model,
|
||||
voice: voiceId,
|
||||
// Use wav for streaming (lower latency, no decoding overhead)
|
||||
// Use mp3 for non-streaming (smaller size)
|
||||
response_format: streaming ? 'wav' : 'mp3',
|
||||
};
|
||||
|
||||
// Add optional parameters if present
|
||||
if (voice.dialect === 'openai') {
|
||||
if (voice.speed !== undefined) body.speed = voice.speed;
|
||||
if (voice.instruction) body.instructions = voice.instruction;
|
||||
}
|
||||
|
||||
// Build headers
|
||||
const headers: HeadersInit = {
|
||||
'Content-Type': 'application/json',
|
||||
...(!apiKey ? {} : { 'Authorization': `Bearer ${apiKey}` }),
|
||||
...(!access.orgId ? {} : { 'OpenAI-Organization': access.orgId }),
|
||||
};
|
||||
if (apiKey) {
|
||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||
}
|
||||
if (access.orgId) {
|
||||
headers['OpenAI-Organization'] = access.orgId;
|
||||
|
||||
// request.body
|
||||
let body: OpenAIWire_TTSRequest | LocalAIWire_TTSRequest;
|
||||
switch (access.dialect) {
|
||||
case 'localai':
|
||||
if (voice.dialect !== 'localai') throw new Error('Voice dialect mismatch for LocalAI access');
|
||||
body = {
|
||||
input: text,
|
||||
...(voice.backend ? { backend: voice.backend } : {}),
|
||||
...(voice.model ? { model: voice.model } : {}),
|
||||
...(voice.language ? { language: voice.language } : {}),
|
||||
response_format: streaming ? 'wav' : 'mp3',
|
||||
} satisfies LocalAIWire_TTSRequest;
|
||||
break;
|
||||
|
||||
case 'openai':
|
||||
if (voice.dialect !== 'openai') throw new Error('Voice dialect mismatch for OpenAI access');
|
||||
body = {
|
||||
input: text,
|
||||
model: voice.model || FALLBACK_OPENAI_MODEL,
|
||||
voice: ('voiceId' in voice ? voice.voiceId : undefined) || FALLBACK_OPENAI_VOICE_ID,
|
||||
...(voice.speed !== undefined ? { speed: voice.speed } : {}),
|
||||
...(voice.instruction ? { instructions: voice.instruction } : {}),
|
||||
response_format: streaming ? 'wav' : 'mp3',
|
||||
} satisfies OpenAIWire_TTSRequest;
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch
|
||||
// connect
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetchResponseOrTRPCThrow({
|
||||
url,
|
||||
url: `${host}/v1/audio/speech`,
|
||||
method: 'POST',
|
||||
headers,
|
||||
body,
|
||||
@@ -89,7 +103,7 @@ export async function* synthesizeOpenAIProtocol(params: SynthesizeOpenAIParams):
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-streaming: return entire audio at once
|
||||
// non-streaming: return entire audio at once
|
||||
if (!streaming) {
|
||||
try {
|
||||
const audioArrayBuffer = await response.arrayBuffer();
|
||||
@@ -102,12 +116,10 @@ export async function* synthesizeOpenAIProtocol(params: SynthesizeOpenAIParams):
|
||||
return;
|
||||
}
|
||||
|
||||
// Streaming: read chunks
|
||||
// streaming: read chunks
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
yield { t: 'error', e: 'No stream reader available' };
|
||||
return;
|
||||
}
|
||||
if (!reader)
|
||||
return yield { t: 'error', e: 'No stream reader available' };
|
||||
|
||||
try {
|
||||
const accumulatedChunks: Uint8Array[] = [];
|
||||
@@ -141,51 +153,83 @@ export async function* synthesizeOpenAIProtocol(params: SynthesizeOpenAIParams):
|
||||
} catch (error: any) {
|
||||
yield { t: 'error', e: `Stream error: ${error.message || 'Unknown error'}` };
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// List Voices - LocalAI
|
||||
//
|
||||
|
||||
const KNOWN_TTS_MODELS: Record<string, { name: string; description: string }> = {
|
||||
'kokoro': { name: 'Kokoro', description: 'High-quality neural TTS' },
|
||||
'bark': { name: 'Bark', description: 'Text-to-audio by Suno AI' },
|
||||
'piper': { name: 'Piper', description: 'Fast local TTS' },
|
||||
'coqui': { name: 'Coqui', description: 'Coqui TTS engine' },
|
||||
'vall-e-x': { name: 'VALL-E X', description: 'Zero-shot voice cloning' },
|
||||
'tts-1': { name: 'TTS-1', description: 'OpenAI-compatible TTS' },
|
||||
'tts-1-hd': { name: 'TTS-1 HD', description: 'High-definition TTS' },
|
||||
};
|
||||
|
||||
/** LocalAI GET /v1/models response */
|
||||
interface LocalAIWire_ModelsResponse {
|
||||
object: 'list';
|
||||
data: Array<{ id: string; object: 'model' }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* List available TTS models from LocalAI instance
|
||||
*/
|
||||
export async function listVoicesLocalAI(access: SpeexWire_Access_OpenAI): Promise<SpeexWire_ListVoices_Output> {
|
||||
if (access.dialect !== 'localai')
|
||||
throw new Error('listVoicesLocalAI requires localai dialect');
|
||||
|
||||
const { host, apiKey } = _resolveAccess(access);
|
||||
const headers: HeadersInit = {
|
||||
'Content-Type': 'application/json',
|
||||
...(!apiKey ? {} : { 'Authorization': `Bearer ${apiKey}` }),
|
||||
};
|
||||
|
||||
let modelsResponse: LocalAIWire_ModelsResponse;
|
||||
try {
|
||||
modelsResponse = await fetchJsonOrTRPCThrow<LocalAIWire_ModelsResponse>({
|
||||
url: `${host}/v1/models`,
|
||||
headers,
|
||||
name: 'LocalAI',
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.warn('[listVoicesLocalAI] Failed to fetch models:', error.message);
|
||||
return { voices: [] };
|
||||
}
|
||||
|
||||
// Filter to known TTS models only
|
||||
const ttsModels = modelsResponse.data.filter(model => model.id in KNOWN_TTS_MODELS);
|
||||
|
||||
return {
|
||||
voices: ttsModels.map(model => ({
|
||||
id: model.id,
|
||||
name: KNOWN_TTS_MODELS[model.id].name,
|
||||
description: KNOWN_TTS_MODELS[model.id].description,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// Helpers
|
||||
|
||||
function _resolveAccess(access: SpeexWire_Access_OpenAI): { host: string; apiKey: string } {
|
||||
if (access.dialect === 'openai') {
|
||||
// OpenAI: use default host if not specified, API key required
|
||||
let host = (access.apiHost || 'https://api.openai.com').trim();
|
||||
if (!host.startsWith('http'))
|
||||
host = `https://${host}`;
|
||||
if (host.endsWith('/'))
|
||||
host = host.slice(0, -1);
|
||||
|
||||
return {
|
||||
host,
|
||||
apiKey: access.apiKey || '',
|
||||
};
|
||||
}
|
||||
|
||||
// LocalAI: host required, API key optional
|
||||
let host = (access.apiHost || '').trim();
|
||||
if (!host) throw new Error('LocalAI requires apiHost to be specified');
|
||||
function _resolveAccess(access: Readonly<SpeexWire_Access_OpenAI>): { host: string; apiKey: string } {
|
||||
|
||||
// determine host
|
||||
const isOpenAI = access.dialect === 'openai';
|
||||
let host = isOpenAI
|
||||
? (access.apiHost || 'https://api.openai.com').trim()
|
||||
: (access.apiHost || '').trim();
|
||||
if (!host) throw new Error('LocalAI requires a host URL');
|
||||
if (!host.startsWith('http')) {
|
||||
// noinspection HttpUrlsUsage
|
||||
host = `http://${host}`; // LocalAI is often local, default to http
|
||||
host = isOpenAI ? `https://${host}` : `http://${host}`; // LocalAI is often local, default to http
|
||||
}
|
||||
if (host.endsWith('/'))
|
||||
host = host.slice(0, -1);
|
||||
|
||||
return {
|
||||
host,
|
||||
apiKey: access.apiKey || '',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// Wire types
|
||||
|
||||
interface OpenAIWire_TTSRequest {
|
||||
input: string;
|
||||
model: string;
|
||||
voice: string;
|
||||
response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
|
||||
speed?: number;
|
||||
instructions?: string;
|
||||
return { host, apiKey: access.apiKey || '' };
|
||||
}
|
||||
|
||||
@@ -10,9 +10,9 @@ import { findModelsServiceOrNull } from '~/common/stores/llms/store-llms';
|
||||
|
||||
import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
|
||||
|
||||
import type { DCredentialsApiKey, DCredentialsLLMSService, DSpeexEngineAny, SpeexRPCDialect } from './speex.types';
|
||||
import type { SpeexSpeakResult } from './speex.client';
|
||||
import type { DCredentialsApiKey, DCredentialsLLMSService, DSpeexCredentials, DSpeexEngineAny, SpeexRPCDialect } from './speex.types';
|
||||
import type { SpeexSpeechParticle, SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './server/speex.wiretypes';
|
||||
import type { SpeexWire_Access, SpeexWire_ListVoices_Output, SpeexWire_Voice } from './server/speex.wiretypes';
|
||||
|
||||
|
||||
// Configuration
|
||||
@@ -74,26 +74,24 @@ export async function speexSynthesizeRPC(
|
||||
const audioBuffer = _base64ToArrayBuffer(particle.base64);
|
||||
|
||||
// Playback
|
||||
if (options.playback && audioPlayer) {
|
||||
if (options.playback && audioPlayer)
|
||||
audioPlayer.enqueueChunk(audioBuffer);
|
||||
}
|
||||
|
||||
// Accumulate for return
|
||||
if (options.returnAudio) {
|
||||
if (options.returnAudio)
|
||||
audioChunks.push(audioBuffer);
|
||||
}
|
||||
|
||||
// Callback
|
||||
callbacks?.onChunk?.(audioBuffer);
|
||||
break;
|
||||
|
||||
case 'done':
|
||||
if (audioPlayer) {
|
||||
if (audioPlayer)
|
||||
audioPlayer.endPlayback();
|
||||
}
|
||||
break;
|
||||
|
||||
case 'error':
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error(particle.e);
|
||||
}
|
||||
}
|
||||
@@ -206,11 +204,11 @@ function _resolveFromLLMService(dialect: SpeexRPCDialect, credentials: DCredenti
|
||||
if (!service) return null;
|
||||
|
||||
// Extract credentials based on LLM vendor type
|
||||
const setup = service.setup as Record<string, any>;
|
||||
const setup = service.setup as Record<string, any> || {};
|
||||
|
||||
switch (dialect) {
|
||||
case 'elevenlabs':
|
||||
// ElevenLabs doesn't typically link to LLM services
|
||||
// ElevenLabs doesn't link to LLM services
|
||||
return null;
|
||||
|
||||
case 'openai':
|
||||
@@ -261,8 +259,9 @@ function _buildWireVoice(engine: DSpeexEngineAny): SpeexWire_Voice {
|
||||
case 'localai':
|
||||
return {
|
||||
dialect: 'localai',
|
||||
voiceId: voice.voiceId,
|
||||
backend: voice.ttsBackend,
|
||||
model: voice.ttsModel,
|
||||
language: voice.language,
|
||||
};
|
||||
|
||||
case 'webspeech':
|
||||
|
||||
@@ -48,10 +48,13 @@ export interface DVoiceElevenLabs {
|
||||
// speakerBoost?: boolean;
|
||||
}
|
||||
|
||||
// type LocalAITTSBackend = | 'coqui' | 'bark' | 'piper' | 'transformers-musicgen' | 'vall-e-x'
|
||||
export interface DVoiceLocalAI {
|
||||
vendorType: 'localai';
|
||||
ttsModel?: string;
|
||||
voiceId?: string;
|
||||
// we let the user insert strings (or nothing) for the 2 fields below
|
||||
ttsModel?: string; // Model name (e.g., 'kokoro', 'tts_models/en/ljspeech/glow-tts', 'v2/en_speaker_4' for bark)
|
||||
ttsBackend?: string; // Backend (e.g., 'coqui', 'bark', 'piper', 'transformers-musicgen', 'vall-e-x')
|
||||
language?: string; // Language code for multilingual models (e.g., 'en', 'fr' for xtts_v2)
|
||||
}
|
||||
|
||||
export interface DVoiceOpenAI {
|
||||
|
||||
+14
-2
@@ -1,6 +1,17 @@
|
||||
import type { ISpeexVendor } from './ISpeexVendor';
|
||||
|
||||
|
||||
/**
|
||||
* LocalAI TTS Vendor
|
||||
*
|
||||
* LocalAI supports multiple TTS backends: coqui, bark, piper, transformers-musicgen, vall-e-x.
|
||||
* When no backend is specified, LocalAI uses its default configuration.
|
||||
*
|
||||
* Default recommendation: Use 'kokoro' model without specifying a backend for the best
|
||||
* out-of-the-box experience with high-quality neural TTS.
|
||||
*
|
||||
* @see https://localai.io/features/text-to-audio/
|
||||
*/
|
||||
export const SpeexVendorLocalAI: ISpeexVendor<'localai'> = {
|
||||
vendorType: 'localai',
|
||||
name: 'LocalAI',
|
||||
@@ -26,7 +37,8 @@ export const SpeexVendorLocalAI: ISpeexVendor<'localai'> = {
|
||||
|
||||
getDefaultVoice: () => ({
|
||||
vendorType: 'localai',
|
||||
ttsModel: undefined, // depends on what's installed
|
||||
voiceId: undefined,
|
||||
ttsBackend: undefined,
|
||||
ttsModel: 'kokoro', // recommended default - high quality neural TTS
|
||||
language: undefined,
|
||||
}),
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user