From ffc1bf9c580bbf7b3e6ed325bcec76d92567bf96 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Sun, 30 Nov 2025 06:12:20 -0800 Subject: [PATCH] Remove src/modules/elevenlabs --- src/modules/elevenlabs/ElevenlabsSettings.tsx | 67 ----- src/modules/elevenlabs/elevenlabs.client.ts | 126 -------- src/modules/elevenlabs/elevenlabs.router.ts | 277 ------------------ .../elevenlabs/store-module-elevenlabs.ts | 50 ---- .../elevenlabs/useElevenLabsVoiceDropdown.tsx | 102 ------- 5 files changed, 622 deletions(-) delete mode 100644 src/modules/elevenlabs/ElevenlabsSettings.tsx delete mode 100644 src/modules/elevenlabs/elevenlabs.client.ts delete mode 100644 src/modules/elevenlabs/elevenlabs.router.ts delete mode 100644 src/modules/elevenlabs/store-module-elevenlabs.ts delete mode 100644 src/modules/elevenlabs/useElevenLabsVoiceDropdown.tsx diff --git a/src/modules/elevenlabs/ElevenlabsSettings.tsx b/src/modules/elevenlabs/ElevenlabsSettings.tsx deleted file mode 100644 index 2bfd3c613..000000000 --- a/src/modules/elevenlabs/ElevenlabsSettings.tsx +++ /dev/null @@ -1,67 +0,0 @@ -import * as React from 'react'; - -import { FormControl } from '@mui/joy'; - -import { useChatAutoAI } from '../../apps/chat/store-app-chat'; - -import { AlreadySet } from '~/common/components/AlreadySet'; -import { FormInputKey } from '~/common/components/forms/FormInputKey'; -import { FormLabelStart } from '~/common/components/forms/FormLabelStart'; -import { FormRadioControl } from '~/common/components/forms/FormRadioControl'; -import { useCapabilityElevenLabs } from '~/common/components/useCapabilities'; - -import { isElevenLabsEnabled } from './elevenlabs.client'; -import { useElevenLabsVoiceDropdown, useElevenLabsVoices } from './useElevenLabsVoiceDropdown'; -import { useElevenLabsApiKey } from './store-module-elevenlabs'; - - -export function ElevenlabsSettings() { - - // state - const [apiKey, setApiKey] = useElevenLabsApiKey(); - - // external state - const { autoSpeak, setAutoSpeak } = useChatAutoAI(); - const { hasVoices } = useElevenLabsVoices(); - const { isConfiguredServerSide } = useCapabilityElevenLabs(); - const { voicesDropdown } = useElevenLabsVoiceDropdown(true); - - - // derived state - const isValidKey = isElevenLabsEnabled(apiKey); - - - return <> - - {/**/} - {/* 📢 Hear AI responses, even in your own voice*/} - {/**/} - - - - - {!isConfiguredServerSide && } - value={apiKey} onChange={setApiKey} - required={!isConfiguredServerSide} isError={!isValidKey} - />} - - - - {voicesDropdown} - - - ; -} \ No newline at end of file diff --git a/src/modules/elevenlabs/elevenlabs.client.ts b/src/modules/elevenlabs/elevenlabs.client.ts deleted file mode 100644 index 1fddc9f21..000000000 --- a/src/modules/elevenlabs/elevenlabs.client.ts +++ /dev/null @@ -1,126 +0,0 @@ -import { getBackendCapabilities } from '~/modules/backend/store-backend-capabilities'; - -import { AudioLivePlayer } from '~/common/util/audio/AudioLivePlayer'; -import { AudioPlayer } from '~/common/util/audio/AudioPlayer'; -import { CapabilityElevenLabsSpeechSynthesis } from '~/common/components/useCapabilities'; -import { apiStream } from '~/common/util/trpc.client'; -import { convert_Base64_To_UInt8Array } from '~/common/util/blobUtils'; -import { useUIPreferencesStore } from '~/common/stores/store-ui'; - -import { getElevenLabsData, useElevenLabsData } from './store-module-elevenlabs'; - - -export const isValidElevenLabsApiKey = (apiKey?: string) => !!apiKey && apiKey.trim()?.length >= 32; - -export const isElevenLabsEnabled = (apiKey?: string) => - apiKey ? isValidElevenLabsApiKey(apiKey) - : getBackendCapabilities().hasVoiceElevenLabs; - - -export function useCapability(): CapabilityElevenLabsSpeechSynthesis { - const [clientApiKey, voiceId] = useElevenLabsData(); - const isConfiguredServerSide = getBackendCapabilities().hasVoiceElevenLabs; - const isConfiguredClientSide = clientApiKey ? isValidElevenLabsApiKey(clientApiKey) : false; - const mayWork = isConfiguredServerSide || isConfiguredClientSide || !!voiceId; - return { mayWork, isConfiguredServerSide, isConfiguredClientSide }; -} - - -interface ElevenLabsSpeakResult { - success: boolean; - audioBase64?: string; // Available when not streaming -} - - -/** - * Speaks text using ElevenLabs TTS - * @returns Object with success status and optionally the audio base64 (when not streaming) - */ -export async function elevenLabsSpeakText(text: string, voiceId: string | undefined, audioStreaming: boolean, audioTurbo: boolean): Promise { - // Early validation - if (!(text?.trim())) { - // console.log('ElevenLabs: No text to speak'); - return { success: false }; - } - - const { elevenLabsApiKey, elevenLabsVoiceId } = getElevenLabsData(); - if (!isElevenLabsEnabled(elevenLabsApiKey)) { - // console.warn('ElevenLabs: Service not enabled or configured'); - return { success: false }; - } - - const { preferredLanguage } = useUIPreferencesStore.getState(); - const nonEnglish = !(preferredLanguage?.toLowerCase()?.startsWith('en')); - - // audio live player instance, if needed - let liveAudioPlayer: AudioLivePlayer | undefined; - let playbackStarted = false; - let audioBase64: string | undefined; - - try { - - const stream = await apiStream.elevenlabs.speech.mutate({ - xiKey: elevenLabsApiKey, - voiceId: voiceId || elevenLabsVoiceId, - text: text, - nonEnglish, - audioStreaming, - audioTurbo, - }); - - for await (const piece of stream) { - - // ElevenLabs stream buffer - if (piece.audioChunk) { - try { - // create the live audio player as needed - // NOTE: in the future we can have a centralized audio playing system - if (!liveAudioPlayer) - liveAudioPlayer = new AudioLivePlayer(); - - // enqueue a decoded audio chunk - this will throw on malformed base64 data - const chunkArray = convert_Base64_To_UInt8Array(piece.audioChunk.base64, 'elevenLabsSpeakText (chunk)'); - liveAudioPlayer.enqueueChunk(chunkArray.buffer); - playbackStarted = true; - } catch (audioError) { - console.error('ElevenLabs audio chunk error:', audioError); - return { success: false }; - } - } - - // ElevenLabs full audio buffer - else if (piece.audio) { - try { - // return base64 for potential reuse - if (!audioStreaming) - audioBase64 = piece.audio.base64; - - // also consider merging LiveAudioPlayer into AudioPlayer - note this will throw on malformed base64 data - const audioArray = convert_Base64_To_UInt8Array(piece.audio.base64, 'elevenLabsSpeakText'); - void AudioPlayer.playBuffer(audioArray.buffer); // fire/forget - it's a single piece of audio (could be long tho) - playbackStarted = true; - } catch (audioError) { - console.error('ElevenLabs audio buffer error:', audioError); - return { success: false }; - } - } - - // Errors - else if (piece.errorMessage) { - console.error('ElevenLabs error:', piece.errorMessage); - return { success: false }; - } else if (piece.warningMessage) { - console.warn('ElevenLabs warning:', piece.warningMessage); - // Continue processing warnings - } else if (piece.control === 'start' || piece.control === 'end') { - // Control messages - continue processing - } else { - console.log('ElevenLabs unknown piece:', piece); - } - } - return { success: playbackStarted, audioBase64 }; - } catch (error) { - console.error('ElevenLabs playback error:', error); - return { success: false }; - } -} diff --git a/src/modules/elevenlabs/elevenlabs.router.ts b/src/modules/elevenlabs/elevenlabs.router.ts deleted file mode 100644 index 894be50d0..000000000 --- a/src/modules/elevenlabs/elevenlabs.router.ts +++ /dev/null @@ -1,277 +0,0 @@ -import * as z from 'zod/v4'; - -import { createTRPCRouter, publicProcedure } from '~/server/trpc/trpc.server'; -import { env } from '~/server/env.server'; -import { fetchJsonOrTRPCThrow, fetchResponseOrTRPCThrow } from '~/server/trpc/trpc.router.fetchers'; - - -// configuration -const SAFETY_TEXT_LENGTH = 1000; -const MIN_CHUNK_SIZE = 4096; // Minimum chunk size in bytes - - -// Schema definitions -export type SpeechInputSchema = z.infer; -export const speechInputSchema = z.object({ - xiKey: z.string().optional(), - voiceId: z.string().optional(), - text: z.string(), - nonEnglish: z.boolean(), - audioStreaming: z.boolean(), - audioTurbo: z.boolean(), -}); - -export type VoiceSchema = z.infer; -const voiceSchema = z.object({ - id: z.string(), - name: z.string(), - description: z.string().nullable(), - previewUrl: z.string().nullable(), - category: z.string(), - default: z.boolean(), -}); - - -export const elevenlabsRouter = createTRPCRouter({ - - /** - * List Voices available to this API key - */ - listVoices: publicProcedure - .input(z.object({ - elevenKey: z.string().optional(), - })) - .output(z.object({ - voices: z.array(voiceSchema), - })) - .query(async ({ input }) => { - - const { elevenKey } = input; - const { headers, url } = elevenlabsAccess(elevenKey, '/v1/voices'); - - const voicesList = await fetchJsonOrTRPCThrow({ - url, - headers, - name: 'ElevenLabs', - }); - - // bring category != 'premade' to the top - voicesList.voices.sort((a, b) => { - if (a.category === 'premade' && b.category !== 'premade') return 1; - if (a.category !== 'premade' && b.category === 'premade') return -1; - return 0; - }); - - return { - voices: voicesList.voices.map((voice, idx) => ({ - id: voice.voice_id, - name: voice.name, - description: voice.description, - previewUrl: voice.preview_url, - category: voice.category, - default: idx === 0, - })), - }; - - }), - - /** - * Speech synthesis procedure using tRPC streaming - */ - speech: publicProcedure - .input(speechInputSchema) - .mutation(async function* ({ input: { xiKey, text, voiceId, nonEnglish, audioStreaming, audioTurbo }, ctx }) { - - // start streaming back - yield { control: 'start' }; - - // Safety check: trim text that's too long - if (text.length > SAFETY_TEXT_LENGTH) { - text = text.slice(0, SAFETY_TEXT_LENGTH); - yield { warningMessage: 'text was truncated to maximum length' }; - } - - let response: Response; - try { - - // Prepare the upstream request - const path = `/v1/text-to-speech/${elevenlabsVoiceId(voiceId)}${audioStreaming ? '/stream' : ''}`; - const { headers, url } = elevenlabsAccess(xiKey, path); - const body: ElevenlabsWire.TTSRequest = { - text: text, - model_id: - audioTurbo ? 'eleven_turbo_v2_5' - : nonEnglish ? 'eleven_multilingual_v2' - : 'eleven_multilingual_v2', // even for english, use the latest multilingual model - }; - - // Blocking fetch - response = await fetchResponseOrTRPCThrow({ url, method: 'POST', headers, body, signal: ctx.reqSignal, name: 'ElevenLabs' }); - - } catch (error: any) { - yield { errorMessage: `fetch issue: ${error.message || 'Unknown error'}` }; - return; - } - - // Parse headers - const responseHeaders = _safeParseTTSResponseHeaders(response.headers); - - // If not streaming, return the entire audio - if (!audioStreaming) { - const audioArrayBuffer = await response.arrayBuffer(); - yield { - audio: { - base64: Buffer.from(audioArrayBuffer).toString('base64'), - contentType: responseHeaders.contentType, - characterCost: responseHeaders.characterCost, - ttsLatencyMs: responseHeaders.ttsLatencyMs, - }, - }; - yield { control: 'end' }; - return; - } - - const reader = response.body?.getReader(); - if (!reader) { - yield { errorMessage: 'stream issue: No reader' }; - return; - } - - // STREAM the audio chunks back to the client - try { - - // Initialize a buffer to accumulate chunks - const accumulatedChunks: Uint8Array[] = []; - let accumulatedSize = 0; - - // Read loop - while (true) { - const { value, done: readerDone } = await reader.read(); - if (readerDone) break; - if (!value) continue; - - // Accumulate chunks - accumulatedChunks.push(value); - accumulatedSize += value.length; - - // When accumulated size reaches or exceeds MIN_CHUNK_SIZE, yield the chunk - if (accumulatedSize >= MIN_CHUNK_SIZE) { - yield { - audioChunk: { - base64: Buffer.concat(accumulatedChunks).toString('base64'), - }, - }; - // Reset the accumulation - accumulatedChunks.length = 0; - accumulatedSize = 0; - } - } - - // If there's any remaining data, yield it as well - if (accumulatedSize) { - yield { - audioChunk: { - base64: Buffer.concat(accumulatedChunks).toString('base64'), - }, - }; - } - } catch (error: any) { - yield { errorMessage: `stream issue: ${error.message || 'Unknown error'}` }; - return; - } - - // end streaming (if a control error wasn't thrown) - yield { control: 'end' }; - }), - -}); - -/** - * Helper function to construct ElevenLabs API access details - */ -export function elevenlabsAccess(elevenKey: string | undefined, apiPath: string): { headers: HeadersInit; url: string } { - // API key - elevenKey = (elevenKey || env.ELEVENLABS_API_KEY || '').trim(); - if (!elevenKey) - throw new Error('Missing ElevenLabs API key.'); - - // API host - let host = (env.ELEVENLABS_API_HOST || 'api.elevenlabs.io').trim(); - if (!host.startsWith('http')) - host = `https://${host}`; - if (host.endsWith('/') && apiPath.startsWith('/')) - host = host.slice(0, -1); - - return { - headers: { - 'Accept': 'audio/mpeg', - 'Content-Type': 'application/json', - 'xi-api-key': elevenKey, - }, - url: host + apiPath, - }; -} - -export function elevenlabsVoiceId(voiceId?: string): string { - return voiceId?.trim() || env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM'; -} - - -function _safeParseTTSResponseHeaders(headers: Headers): ElevenlabsWire.TTSResponseHeaders { - return { - contentType: headers.get('content-type') || 'audio/mpeg', - characterCost: parseInt(headers.get('character-cost') || '0'), - currentConcurrentRequests: parseInt(headers.get('current-concurrent-requests') || '0'), - maximumConcurrentRequests: parseInt(headers.get('maximum-concurrent-requests') || '0'), - ttsLatencyMs: parseInt(headers.get('tts-latency-ms') || '0'), - }; -} - - -/// This is the upstream API [rev-eng on 2023-04-12] -export namespace ElevenlabsWire { - export interface TTSRequest { - text: string; - model_id?: - | 'eleven_monolingual_v1' - | 'eleven_multilingual_v1' - | 'eleven_multilingual_v2' - | 'eleven_turbo_v2' - | 'eleven_turbo_v2_5'; - voice_settings?: { - stability: number; - similarity_boost: number; - }; - } - - export interface TTSResponseHeaders { - // Response metadata - contentType: string; // Should be 'audio/mpeg' - - // Cost and usage metrics - characterCost: number; // Cost in characters for this generation - currentConcurrentRequests: number; // Current number of concurrent requests - maximumConcurrentRequests: number; // Maximum allowed concurrent requests - ttsLatencyMs?: number; // Time taken to generate speech (not in streaming mode) - } - - export interface VoicesList { - voices: Voice[]; - } - - interface Voice { - voice_id: string; - name: string; - //samples: Sample[]; - category: string; - // fine_tuning: FineTuning; - labels: Record; - description: string; - preview_url: string; - // available_for_tiers: string[]; - settings: { - stability: number; - similarity_boost: number; - }; - } -} \ No newline at end of file diff --git a/src/modules/elevenlabs/store-module-elevenlabs.ts b/src/modules/elevenlabs/store-module-elevenlabs.ts deleted file mode 100644 index a784143a9..000000000 --- a/src/modules/elevenlabs/store-module-elevenlabs.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { create } from 'zustand'; -import { persist } from 'zustand/middleware'; -import { useShallow } from 'zustand/react/shallow'; - - -interface ModuleElevenlabsStore { - - // ElevenLabs Text to Speech settings - - elevenLabsApiKey: string; - setElevenLabsApiKey: (apiKey: string) => void; - - elevenLabsVoiceId: string; - setElevenLabsVoiceId: (voiceId: string) => void; - -} - -const useElevenlabsStore = create()( - persist( - (set) => ({ - - // ElevenLabs Text to Speech settings - - elevenLabsApiKey: '', - setElevenLabsApiKey: (elevenLabsApiKey: string) => set({ elevenLabsApiKey }), - - elevenLabsVoiceId: '', - setElevenLabsVoiceId: (elevenLabsVoiceId: string) => set({ elevenLabsVoiceId }), - - }), - { - name: 'app-module-elevenlabs', - }), -); - -export const useElevenLabsApiKey = (): [string, (apiKey: string) => void] => { - const apiKey = useElevenlabsStore(state => state.elevenLabsApiKey); - return [apiKey, useElevenlabsStore.getState().setElevenLabsApiKey]; -}; - -export const useElevenLabsVoiceId = (): [string, (voiceId: string) => void] => { - const voiceId = useElevenlabsStore(state => state.elevenLabsVoiceId); - return [voiceId, useElevenlabsStore.getState().setElevenLabsVoiceId]; -}; - -export const useElevenLabsData = (): [string, string] => - useElevenlabsStore(useShallow(state => [state.elevenLabsApiKey, state.elevenLabsVoiceId])); - -export const getElevenLabsData = (): { elevenLabsApiKey: string, elevenLabsVoiceId: string } => - useElevenlabsStore.getState(); diff --git a/src/modules/elevenlabs/useElevenLabsVoiceDropdown.tsx b/src/modules/elevenlabs/useElevenLabsVoiceDropdown.tsx deleted file mode 100644 index fdfaafe3a..000000000 --- a/src/modules/elevenlabs/useElevenLabsVoiceDropdown.tsx +++ /dev/null @@ -1,102 +0,0 @@ -import * as React from 'react'; - -import { CircularProgress, Option, Select } from '@mui/joy'; -import KeyboardArrowDownIcon from '@mui/icons-material/KeyboardArrowDown'; -import RecordVoiceOverTwoToneIcon from '@mui/icons-material/RecordVoiceOverTwoTone'; - -import { AudioPlayer } from '~/common/util/audio/AudioPlayer'; -import { apiQuery } from '~/common/util/trpc.client'; - -import { VoiceSchema } from './elevenlabs.router'; -import { isElevenLabsEnabled } from './elevenlabs.client'; -import { useElevenLabsApiKey, useElevenLabsVoiceId } from './store-module-elevenlabs'; - - -function VoicesDropdown(props: { - isValidKey: boolean, - isFetchingVoices: boolean, - isErrorVoices: boolean, - disabled?: boolean, - voices: VoiceSchema[], - voiceId: string | null, - setVoiceId: (voiceId: string) => void, -}) { - - const handleVoiceChange = (_event: any, value: string | null) => props.setVoiceId(value || ''); - - return ( - - ); -} - - -export function useElevenLabsVoices() { - const [apiKey] = useElevenLabsApiKey(); - - const isConfigured = isElevenLabsEnabled(apiKey); - - const { data, isError, isFetching, isPending } = apiQuery.elevenlabs.listVoices.useQuery({ elevenKey: apiKey }, { - enabled: isConfigured, - staleTime: 1000 * 60 * 5, // 5 minutes - }); - - return { - isConfigured, - isError, - isFetching, - hasVoices: !isPending && !!data?.voices.length, - voices: data?.voices || [], - }; -} - - -export function useElevenLabsVoiceDropdown(autoSpeak: boolean, disabled?: boolean) { - - // external state - const { isConfigured, isError, isFetching, hasVoices, voices } = useElevenLabsVoices(); - const [voiceId, setVoiceId] = useElevenLabsVoiceId(); - - // derived state - const voice: VoiceSchema | undefined = voices.find(voice => voice.id === voiceId); - - // [E] autoSpeak - const previewUrl = (autoSpeak && voice?.previewUrl) || null; - React.useEffect(() => { - if (previewUrl) - void AudioPlayer.playUrl(previewUrl); - }, [previewUrl]); - - const voicesDropdown = React.useMemo(() => - , - [disabled, isConfigured, isError, isFetching, setVoiceId, voiceId, voices], - ); - - return { - hasVoices, - voiceId, - voiceName: voice?.name, - voicesDropdown, - }; -} \ No newline at end of file