diff --git a/pages/info/debug.tsx b/pages/info/debug.tsx index 00fa9c429..76ef63303 100644 --- a/pages/info/debug.tsx +++ b/pages/info/debug.tsx @@ -18,7 +18,7 @@ import { ROUTE_APP_CHAT, ROUTE_INDEX } from '~/common/app.routes'; import { Release } from '~/common/app.release'; // capabilities access -import { useCapabilityBrowserSpeechRecognition, useCapabilityElevenLabs, useCapabilityTextToImage } from '~/common/components/useCapabilities'; +import { useCapabilityBrowserSpeechRecognition, useCapabilityTextToImage } from '~/common/components/useCapabilities'; // stores access import { getLLMsDebugInfo } from '~/common/stores/llms/store-llms'; @@ -95,7 +95,6 @@ function AppDebug() { const cProduct = { capabilities: { mic: useCapabilityBrowserSpeechRecognition(), - elevenLabs: useCapabilityElevenLabs(), textToImage: useCapabilityTextToImage(), }, models: getLLMsDebugInfo(), diff --git a/src/apps/call/CallWizard.tsx b/src/apps/call/CallWizard.tsx index 40908e758..33bdc06b3 100644 --- a/src/apps/call/CallWizard.tsx +++ b/src/apps/call/CallWizard.tsx @@ -8,11 +8,13 @@ import CloseRoundedIcon from '@mui/icons-material/CloseRounded'; import MicIcon from '@mui/icons-material/Mic'; import WarningRoundedIcon from '@mui/icons-material/WarningRounded'; +import { useSpeexGlobalEngine } from '~/modules/speex/store-module-speex'; + import { PhVoice } from '~/common/components/icons/phosphor/PhVoice'; import { animationColorRainbow } from '~/common/util/animUtils'; import { navigateBack } from '~/common/app.routes'; import { optimaOpenPreferences } from '~/common/layout/optima/useOptima'; -import { useCapabilityBrowserSpeechRecognition, useCapabilityElevenLabs } from '~/common/components/useCapabilities'; +import { useCapabilityBrowserSpeechRecognition } from '~/common/components/useCapabilities'; import { useChatStore } from '~/common/stores/chat/store-chats'; import { useUICounter } from '~/common/stores/store-ui'; @@ -45,7 +47,7 @@ export function CallWizard(props: { strict?: boolean, conversationId: string | n // external state const recognition = useCapabilityBrowserSpeechRecognition(); - const synthesis = useCapabilityElevenLabs(); + const speexGlobalEngine = useSpeexGlobalEngine(); const chatIsEmpty = useChatStore(state => { if (!props.conversationId) return false; @@ -58,15 +60,16 @@ export function CallWizard(props: { strict?: boolean, conversationId: string | n const outOfTheBlue = !props.conversationId; const overriddenEmptyChat = chatEmptyOverride || !chatIsEmpty; const overriddenRecognition = recognitionOverride || recognition.mayWork; - const allGood = overriddenEmptyChat && overriddenRecognition && synthesis.mayWork; - const fatalGood = overriddenRecognition && synthesis.mayWork; + const synthesisShallWork = !!speexGlobalEngine; + const allGood = overriddenEmptyChat && overriddenRecognition && synthesisShallWork; + const fatalGood = overriddenRecognition && synthesisShallWork; const handleOverrideChatEmpty = React.useCallback(() => setChatEmptyOverride(true), []); const handleOverrideRecognition = React.useCallback(() => setRecognitionOverride(true), []); - const handleConfigureElevenLabs = React.useCallback(() => optimaOpenPreferences('voice'), []); + const handleConfigureVoice = React.useCallback(() => optimaOpenPreferences('voice'), []); const handleFinishButton = React.useCallback(() => { if (!allGood) @@ -130,15 +133,15 @@ export function CallWizard(props: { strict?: boolean, conversationId: string | n } text={ - (synthesis.mayWork ? 'Voice synthesis should be ready.' : 'There might be an issue with ElevenLabs voice synthesis.') - + (synthesis.isConfiguredServerSide ? '' : (synthesis.isConfiguredClientSide ? '' : ' Please add your API key in the settings.')) + (synthesisShallWork ? 'Voice synthesis should be ready.' : 'There might be an issue with voice synthesis.') + // + (synthesis.isConfiguredServerSide ? '' : (synthesis.isConfiguredClientSide ? '' : ' Please add your API key in the settings.')) } - button={synthesis.mayWork ? undefined : ( - )} - hasIssue={!synthesis.mayWork} + hasIssue={!synthesisShallWork} /> {/**/} diff --git a/src/apps/call/Contacts.tsx b/src/apps/call/Contacts.tsx index e1c27e3ea..35fbf0fd4 100644 --- a/src/apps/call/Contacts.tsx +++ b/src/apps/call/Contacts.tsx @@ -317,7 +317,7 @@ export function Contacts(props: { setCallIntent: (intent: AppCallIntent) => void issue={354} text='Call App: Support thread and compatibility matrix' note={<> - Voice input uses the HTML Web Speech API, and speech output requires an ElevenLabs API Key. + Voice input uses the HTML Web Speech API. } // note2='Please report any issues you encounter' sx={{ diff --git a/src/apps/call/Telephone.tsx b/src/apps/call/Telephone.tsx index d9e1aef5e..dfed73e55 100644 --- a/src/apps/call/Telephone.tsx +++ b/src/apps/call/Telephone.tsx @@ -13,9 +13,9 @@ import { ScrollToBottomButton } from '~/common/scroll-to-bottom/ScrollToBottomBu import { useChatLLMDropdown } from '../chat/components/layout-bar/useLLMDropdown'; import { SystemPurposeId, SystemPurposes } from '../../data'; -import { elevenLabsSpeakText } from '~/modules/elevenlabs/elevenlabs.client'; -import { AixChatGenerateContent_DMessageGuts, aixChatGenerateContent_DMessage_FromConversation } from '~/modules/aix/client/aix.client'; -import { useElevenLabsVoiceDropdown } from '~/modules/elevenlabs/useElevenLabsVoiceDropdown'; + +import { aixChatGenerateContent_DMessage_FromConversation, AixChatGenerateContent_DMessageGuts } from '~/modules/aix/client/aix.client'; +import { speakText } from '~/modules/speex/speex.client'; import type { OptimaBarControlMethods } from '~/common/layout/optima/bar/OptimaBarDropdown'; import { AudioPlayer } from '~/common/util/audio/AudioPlayer'; @@ -43,18 +43,13 @@ import { useAppCallStore } from './state/store-app-call'; function CallMenu(props: { pushToTalk: boolean, setPushToTalk: (pushToTalk: boolean) => void, - override: boolean, - setOverride: (overridePersonaVoice: boolean) => void, }) { // external state const { grayUI, toggleGrayUI } = useAppCallStore(); - const { voicesDropdown } = useElevenLabsVoiceDropdown(false, !props.override); const handlePushToTalkToggle = () => props.setPushToTalk(!props.pushToTalk); - const handleChangeVoiceToggle = () => props.setOverride(!props.override); - return @@ -63,17 +58,6 @@ function CallMenu(props: { - - - Change Voice - - - - - {' '} - {voicesDropdown} - - @@ -98,7 +82,6 @@ export function Telephone(props: { const [avatarClickCount, setAvatarClickCount] = React.useState(0);// const [micMuted, setMicMuted] = React.useState(false); const [callElapsedTime, setCallElapsedTime] = React.useState('00:00'); const [callMessages, setCallMessages] = React.useState([]); - const [overridePersonaVoice, setOverridePersonaVoice] = React.useState(false); const [personaTextInterim, setPersonaTextInterim] = React.useState(null); const [pushToTalk, setPushToTalk] = React.useState(true); const [stage, setStage] = React.useState<'ring' | 'declined' | 'connected' | 'ended'>('ring'); @@ -118,7 +101,7 @@ export function Telephone(props: { })); const persona = SystemPurposes[props.callIntent.personaId as SystemPurposeId] ?? undefined; const personaCallStarters = persona?.call?.starters ?? undefined; - const personaVoiceId = overridePersonaVoice ? undefined : (persona?.voices?.elevenLabs?.voiceId ?? undefined); + // const personaVoiceSelector = React.useMemo(() => personaGetVoiceSelector(persona), [persona]); const personaSystemMessage = persona?.systemMessage ?? undefined; // hooks and speech @@ -165,7 +148,6 @@ export function Telephone(props: { }; // [E] pickup -> seed message and call timer - // FIXME: Overriding the voice will reset the call - not a desired behavior React.useEffect(() => { if (!isConnected) return; @@ -185,11 +167,14 @@ export function Telephone(props: { setCallMessages([createDMessageTextContent('assistant', firstMessage)]); // [state] set assistant:hello message - // fire/forget - void elevenLabsSpeakText(firstMessage, personaVoiceId, true, true); + // fire/forget - use 'fast' priority for real-time conversation + void speakText(firstMessage, + undefined, + { label: 'Call', priority: 'fast' }, + ); return () => clearInterval(interval); - }, [isConnected, personaCallStarters, personaVoiceId]); + }, [isConnected, personaCallStarters]); // [E] persona streaming response - upon new user message React.useEffect(() => { @@ -270,9 +255,12 @@ export function Telephone(props: { fullMessage.generator = status.lastDMessage.generator; setCallMessages(messages => [...messages, fullMessage]); // [state] append assistant:call_response - // fire/forget + // fire/forget - use 'fast' priority for real-time conversation if (status.outcome === 'success' && finalText?.length >= 1) - void elevenLabsSpeakText(finalText, personaVoiceId, true, true); + void speakText(finalText, + undefined, + { label: 'Call', priority: 'fast' }, + ); }).catch((err: DOMException) => { if (err?.name !== 'AbortError') { @@ -288,7 +276,7 @@ export function Telephone(props: { responseAbortController.current?.abort(); responseAbortController.current = null; }; - }, [isConnected, callMessages, modelId, personaVoiceId, personaSystemMessage, reMessages]); + }, [callMessages, isConnected, modelId, personaSystemMessage, reMessages]); // [E] Message interrupter const abortTrigger = isConnected && recognitionState.hasSpeech; @@ -325,7 +313,6 @@ export function Telephone(props: {