mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Call: port to Speex
This commit is contained in:
@@ -18,7 +18,7 @@ import { ROUTE_APP_CHAT, ROUTE_INDEX } from '~/common/app.routes';
|
||||
import { Release } from '~/common/app.release';
|
||||
|
||||
// capabilities access
|
||||
import { useCapabilityBrowserSpeechRecognition, useCapabilityElevenLabs, useCapabilityTextToImage } from '~/common/components/useCapabilities';
|
||||
import { useCapabilityBrowserSpeechRecognition, useCapabilityTextToImage } from '~/common/components/useCapabilities';
|
||||
|
||||
// stores access
|
||||
import { getLLMsDebugInfo } from '~/common/stores/llms/store-llms';
|
||||
@@ -95,7 +95,6 @@ function AppDebug() {
|
||||
const cProduct = {
|
||||
capabilities: {
|
||||
mic: useCapabilityBrowserSpeechRecognition(),
|
||||
elevenLabs: useCapabilityElevenLabs(),
|
||||
textToImage: useCapabilityTextToImage(),
|
||||
},
|
||||
models: getLLMsDebugInfo(),
|
||||
|
||||
@@ -8,11 +8,13 @@ import CloseRoundedIcon from '@mui/icons-material/CloseRounded';
|
||||
import MicIcon from '@mui/icons-material/Mic';
|
||||
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
|
||||
|
||||
import { useSpeexGlobalEngine } from '~/modules/speex/store-module-speex';
|
||||
|
||||
import { PhVoice } from '~/common/components/icons/phosphor/PhVoice';
|
||||
import { animationColorRainbow } from '~/common/util/animUtils';
|
||||
import { navigateBack } from '~/common/app.routes';
|
||||
import { optimaOpenPreferences } from '~/common/layout/optima/useOptima';
|
||||
import { useCapabilityBrowserSpeechRecognition, useCapabilityElevenLabs } from '~/common/components/useCapabilities';
|
||||
import { useCapabilityBrowserSpeechRecognition } from '~/common/components/useCapabilities';
|
||||
import { useChatStore } from '~/common/stores/chat/store-chats';
|
||||
import { useUICounter } from '~/common/stores/store-ui';
|
||||
|
||||
@@ -45,7 +47,7 @@ export function CallWizard(props: { strict?: boolean, conversationId: string | n
|
||||
|
||||
// external state
|
||||
const recognition = useCapabilityBrowserSpeechRecognition();
|
||||
const synthesis = useCapabilityElevenLabs();
|
||||
const speexGlobalEngine = useSpeexGlobalEngine();
|
||||
const chatIsEmpty = useChatStore(state => {
|
||||
if (!props.conversationId)
|
||||
return false;
|
||||
@@ -58,15 +60,16 @@ export function CallWizard(props: { strict?: boolean, conversationId: string | n
|
||||
const outOfTheBlue = !props.conversationId;
|
||||
const overriddenEmptyChat = chatEmptyOverride || !chatIsEmpty;
|
||||
const overriddenRecognition = recognitionOverride || recognition.mayWork;
|
||||
const allGood = overriddenEmptyChat && overriddenRecognition && synthesis.mayWork;
|
||||
const fatalGood = overriddenRecognition && synthesis.mayWork;
|
||||
const synthesisShallWork = !!speexGlobalEngine;
|
||||
const allGood = overriddenEmptyChat && overriddenRecognition && synthesisShallWork;
|
||||
const fatalGood = overriddenRecognition && synthesisShallWork;
|
||||
|
||||
|
||||
const handleOverrideChatEmpty = React.useCallback(() => setChatEmptyOverride(true), []);
|
||||
|
||||
const handleOverrideRecognition = React.useCallback(() => setRecognitionOverride(true), []);
|
||||
|
||||
const handleConfigureElevenLabs = React.useCallback(() => optimaOpenPreferences('voice'), []);
|
||||
const handleConfigureVoice = React.useCallback(() => optimaOpenPreferences('voice'), []);
|
||||
|
||||
const handleFinishButton = React.useCallback(() => {
|
||||
if (!allGood)
|
||||
@@ -130,15 +133,15 @@ export function CallWizard(props: { strict?: boolean, conversationId: string | n
|
||||
<StatusCard
|
||||
icon={<PhVoice />}
|
||||
text={
|
||||
(synthesis.mayWork ? 'Voice synthesis should be ready.' : 'There might be an issue with ElevenLabs voice synthesis.')
|
||||
+ (synthesis.isConfiguredServerSide ? '' : (synthesis.isConfiguredClientSide ? '' : ' Please add your API key in the settings.'))
|
||||
(synthesisShallWork ? 'Voice synthesis should be ready.' : 'There might be an issue with voice synthesis.')
|
||||
// + (synthesis.isConfiguredServerSide ? '' : (synthesis.isConfiguredClientSide ? '' : ' Please add your API key in the settings.'))
|
||||
}
|
||||
button={synthesis.mayWork ? undefined : (
|
||||
<Button variant='outlined' onClick={handleConfigureElevenLabs} sx={{ mx: 1 }}>
|
||||
button={synthesisShallWork ? undefined : (
|
||||
<Button variant='outlined' onClick={handleConfigureVoice} sx={{ mx: 1 }}>
|
||||
Configure
|
||||
</Button>
|
||||
)}
|
||||
hasIssue={!synthesis.mayWork}
|
||||
hasIssue={!synthesisShallWork}
|
||||
/>
|
||||
|
||||
{/*<Typography>*/}
|
||||
|
||||
@@ -317,7 +317,7 @@ export function Contacts(props: { setCallIntent: (intent: AppCallIntent) => void
|
||||
issue={354}
|
||||
text='Call App: Support thread and compatibility matrix'
|
||||
note={<>
|
||||
Voice input uses the HTML Web Speech API, and speech output requires an ElevenLabs API Key.
|
||||
Voice input uses the HTML Web Speech API.
|
||||
</>}
|
||||
// note2='Please report any issues you encounter'
|
||||
sx={{
|
||||
|
||||
+16
-29
@@ -13,9 +13,9 @@ import { ScrollToBottomButton } from '~/common/scroll-to-bottom/ScrollToBottomBu
|
||||
import { useChatLLMDropdown } from '../chat/components/layout-bar/useLLMDropdown';
|
||||
|
||||
import { SystemPurposeId, SystemPurposes } from '../../data';
|
||||
import { elevenLabsSpeakText } from '~/modules/elevenlabs/elevenlabs.client';
|
||||
import { AixChatGenerateContent_DMessageGuts, aixChatGenerateContent_DMessage_FromConversation } from '~/modules/aix/client/aix.client';
|
||||
import { useElevenLabsVoiceDropdown } from '~/modules/elevenlabs/useElevenLabsVoiceDropdown';
|
||||
|
||||
import { aixChatGenerateContent_DMessage_FromConversation, AixChatGenerateContent_DMessageGuts } from '~/modules/aix/client/aix.client';
|
||||
import { speakText } from '~/modules/speex/speex.client';
|
||||
|
||||
import type { OptimaBarControlMethods } from '~/common/layout/optima/bar/OptimaBarDropdown';
|
||||
import { AudioPlayer } from '~/common/util/audio/AudioPlayer';
|
||||
@@ -43,18 +43,13 @@ import { useAppCallStore } from './state/store-app-call';
|
||||
function CallMenu(props: {
|
||||
pushToTalk: boolean,
|
||||
setPushToTalk: (pushToTalk: boolean) => void,
|
||||
override: boolean,
|
||||
setOverride: (overridePersonaVoice: boolean) => void,
|
||||
}) {
|
||||
|
||||
// external state
|
||||
const { grayUI, toggleGrayUI } = useAppCallStore();
|
||||
const { voicesDropdown } = useElevenLabsVoiceDropdown(false, !props.override);
|
||||
|
||||
const handlePushToTalkToggle = () => props.setPushToTalk(!props.pushToTalk);
|
||||
|
||||
const handleChangeVoiceToggle = () => props.setOverride(!props.override);
|
||||
|
||||
return <OptimaPanelGroupedList title='Call'>
|
||||
|
||||
<MenuItem onClick={handlePushToTalkToggle}>
|
||||
@@ -63,17 +58,6 @@ function CallMenu(props: {
|
||||
<Switch checked={props.pushToTalk} onChange={handlePushToTalkToggle} sx={{ ml: 'auto' }} />
|
||||
</MenuItem>
|
||||
|
||||
<MenuItem onClick={handleChangeVoiceToggle}>
|
||||
<ListItemDecorator><PhVoice /></ListItemDecorator>
|
||||
Change Voice
|
||||
<Switch checked={props.override} onChange={handleChangeVoiceToggle} sx={{ ml: 'auto' }} />
|
||||
</MenuItem>
|
||||
|
||||
<MenuItem>
|
||||
<ListItemDecorator>{' '}</ListItemDecorator>
|
||||
{voicesDropdown}
|
||||
</MenuItem>
|
||||
|
||||
<ListDivider />
|
||||
|
||||
<MenuItem onClick={toggleGrayUI}>
|
||||
@@ -98,7 +82,6 @@ export function Telephone(props: {
|
||||
const [avatarClickCount, setAvatarClickCount] = React.useState<number>(0);// const [micMuted, setMicMuted] = React.useState(false);
|
||||
const [callElapsedTime, setCallElapsedTime] = React.useState<string>('00:00');
|
||||
const [callMessages, setCallMessages] = React.useState<DMessage[]>([]);
|
||||
const [overridePersonaVoice, setOverridePersonaVoice] = React.useState<boolean>(false);
|
||||
const [personaTextInterim, setPersonaTextInterim] = React.useState<string | null>(null);
|
||||
const [pushToTalk, setPushToTalk] = React.useState(true);
|
||||
const [stage, setStage] = React.useState<'ring' | 'declined' | 'connected' | 'ended'>('ring');
|
||||
@@ -118,7 +101,7 @@ export function Telephone(props: {
|
||||
}));
|
||||
const persona = SystemPurposes[props.callIntent.personaId as SystemPurposeId] ?? undefined;
|
||||
const personaCallStarters = persona?.call?.starters ?? undefined;
|
||||
const personaVoiceId = overridePersonaVoice ? undefined : (persona?.voices?.elevenLabs?.voiceId ?? undefined);
|
||||
// const personaVoiceSelector = React.useMemo(() => personaGetVoiceSelector(persona), [persona]);
|
||||
const personaSystemMessage = persona?.systemMessage ?? undefined;
|
||||
|
||||
// hooks and speech
|
||||
@@ -165,7 +148,6 @@ export function Telephone(props: {
|
||||
};
|
||||
|
||||
// [E] pickup -> seed message and call timer
|
||||
// FIXME: Overriding the voice will reset the call - not a desired behavior
|
||||
React.useEffect(() => {
|
||||
if (!isConnected) return;
|
||||
|
||||
@@ -185,11 +167,14 @@ export function Telephone(props: {
|
||||
|
||||
setCallMessages([createDMessageTextContent('assistant', firstMessage)]); // [state] set assistant:hello message
|
||||
|
||||
// fire/forget
|
||||
void elevenLabsSpeakText(firstMessage, personaVoiceId, true, true);
|
||||
// fire/forget - use 'fast' priority for real-time conversation
|
||||
void speakText(firstMessage,
|
||||
undefined,
|
||||
{ label: 'Call', priority: 'fast' },
|
||||
);
|
||||
|
||||
return () => clearInterval(interval);
|
||||
}, [isConnected, personaCallStarters, personaVoiceId]);
|
||||
}, [isConnected, personaCallStarters]);
|
||||
|
||||
// [E] persona streaming response - upon new user message
|
||||
React.useEffect(() => {
|
||||
@@ -270,9 +255,12 @@ export function Telephone(props: {
|
||||
fullMessage.generator = status.lastDMessage.generator;
|
||||
setCallMessages(messages => [...messages, fullMessage]); // [state] append assistant:call_response
|
||||
|
||||
// fire/forget
|
||||
// fire/forget - use 'fast' priority for real-time conversation
|
||||
if (status.outcome === 'success' && finalText?.length >= 1)
|
||||
void elevenLabsSpeakText(finalText, personaVoiceId, true, true);
|
||||
void speakText(finalText,
|
||||
undefined,
|
||||
{ label: 'Call', priority: 'fast' },
|
||||
);
|
||||
|
||||
}).catch((err: DOMException) => {
|
||||
if (err?.name !== 'AbortError') {
|
||||
@@ -288,7 +276,7 @@ export function Telephone(props: {
|
||||
responseAbortController.current?.abort();
|
||||
responseAbortController.current = null;
|
||||
};
|
||||
}, [isConnected, callMessages, modelId, personaVoiceId, personaSystemMessage, reMessages]);
|
||||
}, [callMessages, isConnected, modelId, personaSystemMessage, reMessages]);
|
||||
|
||||
// [E] Message interrupter
|
||||
const abortTrigger = isConnected && recognitionState.hasSpeech;
|
||||
@@ -325,7 +313,6 @@ export function Telephone(props: {
|
||||
<OptimaPanelIn>
|
||||
<CallMenu
|
||||
pushToTalk={pushToTalk} setPushToTalk={setPushToTalk}
|
||||
override={overridePersonaVoice} setOverride={setOverridePersonaVoice}
|
||||
/>
|
||||
</OptimaPanelIn>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user