Remove src/modules/elevenlabs

This commit is contained in:
Enrico Ros
2025-11-30 06:12:20 -08:00
parent a54bfdb342
commit ffc1bf9c58
5 changed files with 0 additions and 622 deletions
@@ -1,67 +0,0 @@
import * as React from 'react';
import { FormControl } from '@mui/joy';
import { useChatAutoAI } from '../../apps/chat/store-app-chat';
import { AlreadySet } from '~/common/components/AlreadySet';
import { FormInputKey } from '~/common/components/forms/FormInputKey';
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
import { FormRadioControl } from '~/common/components/forms/FormRadioControl';
import { useCapabilityElevenLabs } from '~/common/components/useCapabilities';
import { isElevenLabsEnabled } from './elevenlabs.client';
import { useElevenLabsVoiceDropdown, useElevenLabsVoices } from './useElevenLabsVoiceDropdown';
import { useElevenLabsApiKey } from './store-module-elevenlabs';
export function ElevenlabsSettings() {
// state
const [apiKey, setApiKey] = useElevenLabsApiKey();
// external state
const { autoSpeak, setAutoSpeak } = useChatAutoAI();
const { hasVoices } = useElevenLabsVoices();
const { isConfiguredServerSide } = useCapabilityElevenLabs();
const { voicesDropdown } = useElevenLabsVoiceDropdown(true);
// derived state
const isValidKey = isElevenLabsEnabled(apiKey);
return <>
{/*<FormHelperText>*/}
{/* 📢 Hear AI responses, even in your own voice*/}
{/*</FormHelperText>*/}
<FormRadioControl
title='Speak Responses'
description={autoSpeak === 'off' ? 'Off' : 'First paragraph'}
tooltip={!hasVoices ? 'No voices available, please configure a voice synthesis service' : undefined}
disabled={!hasVoices}
options={[
{ value: 'off', label: 'Off' },
{ value: 'firstLine', label: 'Start' },
{ value: 'all', label: 'Full' },
]}
value={autoSpeak} onChange={setAutoSpeak}
/>
{!isConfiguredServerSide && <FormInputKey
autoCompleteId='elevenlabs-key' label='ElevenLabs API Key'
rightLabel={<AlreadySet required={!isConfiguredServerSide} />}
value={apiKey} onChange={setApiKey}
required={!isConfiguredServerSide} isError={!isValidKey}
/>}
<FormControl orientation='horizontal' sx={{ justifyContent: 'space-between', alignItems: 'center' }}>
<FormLabelStart title='Assistant Voice' />
{voicesDropdown}
</FormControl>
</>;
}
-126
View File
@@ -1,126 +0,0 @@
import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities';
import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer';
import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
import { CapabilityElevenLabsSpeechSynthesis } from '~/common/components/useCapabilities';
import { apiStream } from '~/common/util/trpc.client';
import { convert_Base64_To_UInt8Array } from '~/common/util/blobUtils';
import { useUIPreferencesStore } from '~/common/stores/store-ui';
import { getElevenLabsData, useElevenLabsData } from './store-module-elevenlabs';
export const isValidElevenLabsApiKey = (apiKey?: string) => !!apiKey && apiKey.trim()?.length >= 32;
export const isElevenLabsEnabled = (apiKey?: string) =>
apiKey ? isValidElevenLabsApiKey(apiKey)
: getBackendCapabilities().hasVoiceElevenLabs;
export function useCapability(): CapabilityElevenLabsSpeechSynthesis {
const [clientApiKey, voiceId] = useElevenLabsData();
const isConfiguredServerSide = getBackendCapabilities().hasVoiceElevenLabs;
const isConfiguredClientSide = clientApiKey ? isValidElevenLabsApiKey(clientApiKey) : false;
const mayWork = isConfiguredServerSide || isConfiguredClientSide || !!voiceId;
return { mayWork, isConfiguredServerSide, isConfiguredClientSide };
}
interface ElevenLabsSpeakResult {
success: boolean;
audioBase64?: string; // Available when not streaming
}
/**
* Speaks text using ElevenLabs TTS
* @returns Object with success status and optionally the audio base64 (when not streaming)
*/
export async function elevenLabsSpeakText(text: string, voiceId: string | undefined, audioStreaming: boolean, audioTurbo: boolean): Promise<ElevenLabsSpeakResult> {
// Early validation
if (!(text?.trim())) {
// console.log('ElevenLabs: No text to speak');
return { success: false };
}
const { elevenLabsApiKey, elevenLabsVoiceId } = getElevenLabsData();
if (!isElevenLabsEnabled(elevenLabsApiKey)) {
// console.warn('ElevenLabs: Service not enabled or configured');
return { success: false };
}
const { preferredLanguage } = useUIPreferencesStore.getState();
const nonEnglish = !(preferredLanguage?.toLowerCase()?.startsWith('en'));
// audio live player instance, if needed
let liveAudioPlayer: AudioLivePlayer | undefined;
let playbackStarted = false;
let audioBase64: string | undefined;
try {
const stream = await apiStream.elevenlabs.speech.mutate({
xiKey: elevenLabsApiKey,
voiceId: voiceId || elevenLabsVoiceId,
text: text,
nonEnglish,
audioStreaming,
audioTurbo,
});
for await (const piece of stream) {
// ElevenLabs stream buffer
if (piece.audioChunk) {
try {
// create the live audio player as needed
// NOTE: in the future we can have a centralized audio playing system
if (!liveAudioPlayer)
liveAudioPlayer = new AudioLivePlayer();
// enqueue a decoded audio chunk - this will throw on malformed base64 data
const chunkArray = convert_Base64_To_UInt8Array(piece.audioChunk.base64, 'elevenLabsSpeakText (chunk)');
liveAudioPlayer.enqueueChunk(chunkArray.buffer);
playbackStarted = true;
} catch (audioError) {
console.error('ElevenLabs audio chunk error:', audioError);
return { success: false };
}
}
// ElevenLabs full audio buffer
else if (piece.audio) {
try {
// return base64 for potential reuse
if (!audioStreaming)
audioBase64 = piece.audio.base64;
// also consider merging LiveAudioPlayer into AudioPlayer - note this will throw on malformed base64 data
const audioArray = convert_Base64_To_UInt8Array(piece.audio.base64, 'elevenLabsSpeakText');
void AudioPlayer.playBuffer(audioArray.buffer); // fire/forget - it's a single piece of audio (could be long tho)
playbackStarted = true;
} catch (audioError) {
console.error('ElevenLabs audio buffer error:', audioError);
return { success: false };
}
}
// Errors
else if (piece.errorMessage) {
console.error('ElevenLabs error:', piece.errorMessage);
return { success: false };
} else if (piece.warningMessage) {
console.warn('ElevenLabs warning:', piece.warningMessage);
// Continue processing warnings
} else if (piece.control === 'start' || piece.control === 'end') {
// Control messages - continue processing
} else {
console.log('ElevenLabs unknown piece:', piece);
}
}
return { success: playbackStarted, audioBase64 };
} catch (error) {
console.error('ElevenLabs playback error:', error);
return { success: false };
}
}
-277
View File
@@ -1,277 +0,0 @@
import * as z from 'zod/v4';
import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server';
import { env } from '~/server/env.server';
import { fetchJsonOrTRPCThrow, fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers';
// configuration
const SAFETY_TEXT_LENGTH = 1000;
const MIN_CHUNK_SIZE = 4096; // Minimum chunk size in bytes
// Schema definitions
export type SpeechInputSchema = z.infer<typeof speechInputSchema>;
export const speechInputSchema = z.object({
xiKey: z.string().optional(),
voiceId: z.string().optional(),
text: z.string(),
nonEnglish: z.boolean(),
audioStreaming: z.boolean(),
audioTurbo: z.boolean(),
});
export type VoiceSchema = z.infer<typeof voiceSchema>;
const voiceSchema = z.object({
id: z.string(),
name: z.string(),
description: z.string().nullable(),
previewUrl: z.string().nullable(),
category: z.string(),
default: z.boolean(),
});
export const elevenlabsRouter = createTRPCRouter({
/**
* List Voices available to this API key
*/
listVoices: publicProcedure
.input(z.object({
elevenKey: z.string().optional(),
}))
.output(z.object({
voices: z.array(voiceSchema),
}))
.query(async ({ input }) => {
const { elevenKey } = input;
const { headers, url } = elevenlabsAccess(elevenKey, '/v1/voices');
const voicesList = await fetchJsonOrTRPCThrow<ElevenlabsWire.VoicesList>({
url,
headers,
name: 'ElevenLabs',
});
// bring category != 'premade' to the top
voicesList.voices.sort((a, b) => {
if (a.category === 'premade' && b.category !== 'premade') return 1;
if (a.category !== 'premade' && b.category === 'premade') return -1;
return 0;
});
return {
voices: voicesList.voices.map((voice, idx) => ({
id: voice.voice_id,
name: voice.name,
description: voice.description,
previewUrl: voice.preview_url,
category: voice.category,
default: idx === 0,
})),
};
}),
/**
* Speech synthesis procedure using tRPC streaming
*/
speech: publicProcedure
.input(speechInputSchema)
.mutation(async function* ({ input: { xiKey, text, voiceId, nonEnglish, audioStreaming, audioTurbo }, ctx }) {
// start streaming back
yield { control: 'start' };
// Safety check: trim text that's too long
if (text.length > SAFETY_TEXT_LENGTH) {
text = text.slice(0, SAFETY_TEXT_LENGTH);
yield { warningMessage: 'text was truncated to maximum length' };
}
let response: Response;
try {
// Prepare the upstream request
const path = `/v1/text-to-speech/${elevenlabsVoiceId(voiceId)}${audioStreaming ? '/stream' : ''}`;
const { headers, url } = elevenlabsAccess(xiKey, path);
const body: ElevenlabsWire.TTSRequest = {
text: text,
model_id:
audioTurbo ? 'eleven_turbo_v2_5'
: nonEnglish ? 'eleven_multilingual_v2'
: 'eleven_multilingual_v2', // even for english, use the latest multilingual model
};
// Blocking fetch
response = await fetchResponseOrTRPCThrow({ url, method: 'POST', headers, body, signal: ctx.reqSignal, name: 'ElevenLabs' });
} catch (error: any) {
yield { errorMessage: `fetch issue: ${error.message || 'Unknown error'}` };
return;
}
// Parse headers
const responseHeaders = _safeParseTTSResponseHeaders(response.headers);
// If not streaming, return the entire audio
if (!audioStreaming) {
const audioArrayBuffer = await response.arrayBuffer();
yield {
audio: {
base64: Buffer.from(audioArrayBuffer).toString('base64'),
contentType: responseHeaders.contentType,
characterCost: responseHeaders.characterCost,
ttsLatencyMs: responseHeaders.ttsLatencyMs,
},
};
yield { control: 'end' };
return;
}
const reader = response.body?.getReader();
if (!reader) {
yield { errorMessage: 'stream issue: No reader' };
return;
}
// STREAM the audio chunks back to the client
try {
// Initialize a buffer to accumulate chunks
const accumulatedChunks: Uint8Array[] = [];
let accumulatedSize = 0;
// Read loop
while (true) {
const { value, done: readerDone } = await reader.read();
if (readerDone) break;
if (!value) continue;
// Accumulate chunks
accumulatedChunks.push(value);
accumulatedSize += value.length;
// When accumulated size reaches or exceeds MIN_CHUNK_SIZE, yield the chunk
if (accumulatedSize >= MIN_CHUNK_SIZE) {
yield {
audioChunk: {
base64: Buffer.concat(accumulatedChunks).toString('base64'),
},
};
// Reset the accumulation
accumulatedChunks.length = 0;
accumulatedSize = 0;
}
}
// If there's any remaining data, yield it as well
if (accumulatedSize) {
yield {
audioChunk: {
base64: Buffer.concat(accumulatedChunks).toString('base64'),
},
};
}
} catch (error: any) {
yield { errorMessage: `stream issue: ${error.message || 'Unknown error'}` };
return;
}
// end streaming (if a control error wasn't thrown)
yield { control: 'end' };
}),
});
/**
* Helper function to construct ElevenLabs API access details
*/
export function elevenlabsAccess(elevenKey: string | undefined, apiPath: string): { headers: HeadersInit; url: string } {
// API key
elevenKey = (elevenKey || env.ELEVENLABS_API_KEY || '').trim();
if (!elevenKey)
throw new Error('Missing ElevenLabs API key.');
// API host
let host = (env.ELEVENLABS_API_HOST || 'api.elevenlabs.io').trim();
if (!host.startsWith('http'))
host = `https://${host}`;
if (host.endsWith('/') && apiPath.startsWith('/'))
host = host.slice(0, -1);
return {
headers: {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json',
'xi-api-key': elevenKey,
},
url: host + apiPath,
};
}
export function elevenlabsVoiceId(voiceId?: string): string {
return voiceId?.trim() || env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
}
function _safeParseTTSResponseHeaders(headers: Headers): ElevenlabsWire.TTSResponseHeaders {
return {
contentType: headers.get('content-type') || 'audio/mpeg',
characterCost: parseInt(headers.get('character-cost') || '0'),
currentConcurrentRequests: parseInt(headers.get('current-concurrent-requests') || '0'),
maximumConcurrentRequests: parseInt(headers.get('maximum-concurrent-requests') || '0'),
ttsLatencyMs: parseInt(headers.get('tts-latency-ms') || '0'),
};
}
/// This is the upstream API [rev-eng on 2023-04-12]
export namespace ElevenlabsWire {
export interface TTSRequest {
text: string;
model_id?:
| 'eleven_monolingual_v1'
| 'eleven_multilingual_v1'
| 'eleven_multilingual_v2'
| 'eleven_turbo_v2'
| 'eleven_turbo_v2_5';
voice_settings?: {
stability: number;
similarity_boost: number;
};
}
export interface TTSResponseHeaders {
// Response metadata
contentType: string; // Should be 'audio/mpeg'
// Cost and usage metrics
characterCost: number; // Cost in characters for this generation
currentConcurrentRequests: number; // Current number of concurrent requests
maximumConcurrentRequests: number; // Maximum allowed concurrent requests
ttsLatencyMs?: number; // Time taken to generate speech (not in streaming mode)
}
export interface VoicesList {
voices: Voice[];
}
interface Voice {
voice_id: string;
name: string;
//samples: Sample[];
category: string;
// fine_tuning: FineTuning;
labels: Record<string, string>;
description: string;
preview_url: string;
// available_for_tiers: string[];
settings: {
stability: number;
similarity_boost: number;
};
}
}
@@ -1,50 +0,0 @@
import { create } from 'zustand';
import { persist } from 'zustand/middleware';
import { useShallow } from 'zustand/react/shallow';
interface ModuleElevenlabsStore {
// ElevenLabs Text to Speech settings
elevenLabsApiKey: string;
setElevenLabsApiKey: (apiKey: string) => void;
elevenLabsVoiceId: string;
setElevenLabsVoiceId: (voiceId: string) => void;
}
const useElevenlabsStore = create<ModuleElevenlabsStore>()(
persist(
(set) => ({
// ElevenLabs Text to Speech settings
elevenLabsApiKey: '',
setElevenLabsApiKey: (elevenLabsApiKey: string) => set({ elevenLabsApiKey }),
elevenLabsVoiceId: '',
setElevenLabsVoiceId: (elevenLabsVoiceId: string) => set({ elevenLabsVoiceId }),
}),
{
name: 'app-module-elevenlabs',
}),
);
export const useElevenLabsApiKey = (): [string, (apiKey: string) => void] => {
const apiKey = useElevenlabsStore(state => state.elevenLabsApiKey);
return [apiKey, useElevenlabsStore.getState().setElevenLabsApiKey];
};
export const useElevenLabsVoiceId = (): [string, (voiceId: string) => void] => {
const voiceId = useElevenlabsStore(state => state.elevenLabsVoiceId);
return [voiceId, useElevenlabsStore.getState().setElevenLabsVoiceId];
};
export const useElevenLabsData = (): [string, string] =>
useElevenlabsStore(useShallow(state => [state.elevenLabsApiKey, state.elevenLabsVoiceId]));
export const getElevenLabsData = (): { elevenLabsApiKey: string, elevenLabsVoiceId: string } =>
useElevenlabsStore.getState();
@@ -1,102 +0,0 @@
import * as React from 'react';
import { CircularProgress, Option, Select } from '@mui/joy';
import KeyboardArrowDownIcon from '@mui/icons-material/KeyboardArrowDown';
import RecordVoiceOverTwoToneIcon from '@mui/icons-material/RecordVoiceOverTwoTone';
import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
import { apiQuery } from '~/common/util/trpc.client';
import { VoiceSchema } from './elevenlabs.router';
import { isElevenLabsEnabled } from './elevenlabs.client';
import { useElevenLabsApiKey, useElevenLabsVoiceId } from './store-module-elevenlabs';
function VoicesDropdown(props: {
isValidKey: boolean,
isFetchingVoices: boolean,
isErrorVoices: boolean,
disabled?: boolean,
voices: VoiceSchema[],
voiceId: string | null,
setVoiceId: (voiceId: string) => void,
}) {
const handleVoiceChange = (_event: any, value: string | null) => props.setVoiceId(value || '');
return (
<Select
value={props.voiceId} onChange={handleVoiceChange}
variant='outlined' disabled={props.disabled || !props.voices.length}
// color={props.isErrorVoices ? 'danger' : undefined}
placeholder={props.isErrorVoices ? 'Issue loading voices' : props.isValidKey ? 'Select a voice' : 'Missing API Key'}
startDecorator={<RecordVoiceOverTwoToneIcon />}
endDecorator={props.isValidKey && props.isFetchingVoices && <CircularProgress size='sm' />}
indicator={<KeyboardArrowDownIcon />}
slotProps={{
root: { sx: { width: '100%' } },
indicator: { sx: { opacity: 0.5 } },
}}
>
{props.voices.map(voice => (
<Option key={voice.id} value={voice.id}>
{voice.name}
</Option>
))}
</Select>
);
}
export function useElevenLabsVoices() {
const [apiKey] = useElevenLabsApiKey();
const isConfigured = isElevenLabsEnabled(apiKey);
const { data, isError, isFetching, isPending } = apiQuery.elevenlabs.listVoices.useQuery({ elevenKey: apiKey }, {
enabled: isConfigured,
staleTime: 1000 * 60 * 5, // 5 minutes
});
return {
isConfigured,
isError,
isFetching,
hasVoices: !isPending && !!data?.voices.length,
voices: data?.voices || [],
};
}
export function useElevenLabsVoiceDropdown(autoSpeak: boolean, disabled?: boolean) {
// external state
const { isConfigured, isError, isFetching, hasVoices, voices } = useElevenLabsVoices();
const [voiceId, setVoiceId] = useElevenLabsVoiceId();
// derived state
const voice: VoiceSchema | undefined = voices.find(voice => voice.id === voiceId);
// [E] autoSpeak
const previewUrl = (autoSpeak && voice?.previewUrl) || null;
React.useEffect(() => {
if (previewUrl)
void AudioPlayer.playUrl(previewUrl);
}, [previewUrl]);
const voicesDropdown = React.useMemo(() =>
<VoicesDropdown
isValidKey={isConfigured} isFetchingVoices={isFetching} isErrorVoices={isError} disabled={disabled}
voices={voices}
voiceId={voiceId} setVoiceId={setVoiceId}
/>,
[disabled, isConfigured, isError, isFetching, setVoiceId, voiceId, voices],
);
return {
hasVoices,
voiceId,
voiceName: voice?.name,
voicesDropdown,
};
}