From c42625c8aa7c43fe6bb8a09cedcf5f20e286bbfa Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Sat, 18 Nov 2023 16:26:45 -0800 Subject: [PATCH] SpeechRecognition: add done 'reason' --- src/common/components/useSpeechRecognition.ts | 45 +++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/src/common/components/useSpeechRecognition.ts b/src/common/components/useSpeechRecognition.ts index c2d238887..6c6200be6 100644 --- a/src/common/components/useSpeechRecognition.ts +++ b/src/common/components/useSpeechRecognition.ts @@ -7,10 +7,19 @@ import { useGlobalShortcut } from './useGlobalShortcut'; import { useUIPreferencesStore } from '../state/store-ui'; +type DoneReason = + undefined // upon start: not done yet + | 'manual' // user clicked the stop button + | 'continuous-deadline' // we hit our `softStopTimeout` while listening continuously + | 'api-unknown-timeout' // a timeout has occurred + | 'api-error' // underlying .onerror + | 'api-no-speech'; // underlying .onerror, user did not speak + export interface SpeechResult { transcript: string; // the portion of the transcript that is finalized (or all the transcript if done) interimTranscript: string; // for the continuous (interim) listening, this is the current transcript done: boolean; // true if the recognition is done - no more updates after this + doneReason: DoneReason; // the reason why the recognition is done } let cachedCapability: CapabilityBrowserSpeechRecognition | null = null; @@ -38,7 +47,7 @@ export const browserSpeechRecognitionCapability = (): CapabilityBrowserSpeechRec */ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => void, softStopTimeout: number, useShortcutCtrlKey: string | false) => { // enablers - const refRecognition = React.useRef(null); + const refRecognition = React.useRef(null); const onResultCallbackRef = React.useRef(onResultCallback); // session @@ -63,7 +72,7 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => // do not re-initialize, just update the language (if we're here there's a high chance the language has changed) if (refRecognition.current) { - refRecognition.current.lang = preferredLanguage; + refRecognition.current.setLang(preferredLanguage); return; } @@ -82,6 +91,7 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => transcript: '', interimTranscript: '', done: false, + doneReason: undefined, }; const instance = new webSpeechAPI(); @@ -100,10 +110,11 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => } }; - const reloadInactivityTimeout = (timeoutMs: number) => { + const reloadInactivityTimeout = (timeoutMs: number, doneReason: DoneReason) => { clearInactivityTimeout(); inactivityTimeoutId = setTimeout(() => { inactivityTimeoutId = null; + speechResult.doneReason = doneReason; instance.stop(); }, timeoutMs); }; @@ -122,6 +133,7 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => speechResult.transcript = ''; speechResult.interimTranscript = 'Listening...'; speechResult.done = false; + speechResult.doneReason = undefined; onResultCallbackRef.current(speechResult); // let the system handle the first stop (as long as possible) // if (instance.interimResults) @@ -134,13 +146,17 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => clearInactivityTimeout(); speechResult.interimTranscript = ''; speechResult.done = true; + speechResult.doneReason = speechResult.doneReason ?? 'api-unknown-timeout'; onResultCallbackRef.current(speechResult); }; instance.onerror = event => { - if (event.error !== 'no-speech') { + if (event.error === 'no-speech') { + speechResult.doneReason = 'api-no-speech'; + } else { console.error('Error occurred during speech recognition:', event.error); setIsSpeechError(true); + speechResult.doneReason = 'api-error'; } }; @@ -181,11 +197,18 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => // auto-stop if (instance.interimResults) - reloadInactivityTimeout(softStopTimeout); + reloadInactivityTimeout(softStopTimeout, 'continuous-deadline'); }; - // save the instance - refRecognition.current = instance; + // store the control interface + refRecognition.current = { + setLang: (lang: string) => instance.lang = lang, + start: () => instance.start(), + stop: (reason: DoneReason) => { + speechResult.doneReason = reason; + instance.stop(); + }, + }; refStarted.current = false; setIsSpeechEnabled(true); @@ -215,7 +238,7 @@ export const useSpeechRecognition = (onResultCallback: (result: SpeechResult) => if (!refStarted.current) return console.error('stopRecording: Stop recording called while not recording.'); - refRecognition.current.stop(); + refRecognition.current.stop('manual'); }, []); const toggleRecording = React.useCallback(() => { @@ -281,4 +304,10 @@ interface ISpeechRecognition extends EventTarget { interface ISpeechRecognitionEvent extends Event { // readonly resultIndex: number; readonly results: SpeechRecognitionResult[]; +} + +interface SpeechRecoControls { + setLang: (lang: string) => void; + start: () => void; + stop: (reason: DoneReason) => void; } \ No newline at end of file