Persona: port AutoSpeak

2026-05-10 21:50:14 -07:00 · 2024-07-04 02:55:10 -07:00
parent 9e705a12b1
commit 065f30ac38
2 changed files with 70 additions and 33 deletions
@@ -3,14 +3,15 @@ import type { VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/
 import { aixStreamingChatGenerate, StreamingClientUpdate } from '~/modules/aix/client/aix.client';
 import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle';
 import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
-import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
+import { PersonaChatMessageSpeak } from './persona/PersonaChatMessageSpeak';

 import type { DConversationId } from '~/common/stores/chat/chat.conversation';
 import { ConversationsManager } from '~/common/chats/ConversationsManager';
-import { DMessage, messageFragmentsReduceText, messageFragmentsReplaceLastContentText, messageSingleTextOrThrow } from '~/common/stores/chat/chat.message';
+import { DMessage, messageFragmentsReplaceLastContentText, messageSingleTextOrThrow } from '~/common/stores/chat/chat.message';
 import { getUXLabsHighPerformance } from '~/common/state/store-ux-labs';
+import { isContentFragment, isTextPart } from '~/common/stores/chat/chat.fragments';

-import { ChatAutoSpeakType, getChatAutoAI } from '../store-app-chat';
+import { getChatAutoAI } from '../store-app-chat';
 import { getInstantAppChatPanesCount } from '../components/panes/usePanesManager';


@@ -37,6 +38,9 @@ export async function runPersonaOnConversationHead(
    { originLLM: assistantLlmId, purposeId: history[0].purposeId },
  );

+  // AutoSpeak
+  const autoSpeaker = autoSpeak !== 'off' ? new PersonaChatMessageSpeak(autoSpeak) : null;
+
  // when an abort controller is set, the UI switches to the "stop" mode
  const abortController = new AbortController();
  cHandler.setAbortController(abortController);
@@ -55,13 +59,24 @@ export async function runPersonaOnConversationHead(
    'conversation',
    conversationId,
    parallelViewCount,
-    autoSpeak,
-    (accumulatedMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => {
-      cHandler.messageEdit(assistantMessageId, accumulatedMessage, messageComplete, false);
-    },
    abortController.signal,
+    (accumulatedMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => {
+      if (abortController.signal.aborted) return;
+
+      cHandler.messageEdit(assistantMessageId, accumulatedMessage, messageComplete, false);
+
+      if (autoSpeaker && accumulatedMessage.fragments?.length && isContentFragment(accumulatedMessage.fragments[0]) && isTextPart(accumulatedMessage.fragments[0].part)) {
+        if (messageComplete)
+          autoSpeaker.finalizeText(accumulatedMessage.fragments[0].part.text);
+        else
+          autoSpeaker.handleTextSoFar(accumulatedMessage.fragments[0].part.text);
+      }
+    },
  );

+  // check if aborted
+  const hasBeenAborted = abortController.signal.aborted;
+
  // clear to send, again
  // FIXME: race condition? (for sure!)
  cHandler.setAbortController(null);
@@ -71,7 +86,7 @@ export async function runPersonaOnConversationHead(
    void autoConversationTitle(conversationId, false);
  }

-  if (autoSuggestDiagrams || autoSuggestHTMLUI || autoSuggestQuestions)
+  if (!hasBeenAborted && (autoSuggestDiagrams || autoSuggestHTMLUI || autoSuggestQuestions))
    autoSuggestions(null, conversationId, assistantMessageId, autoSuggestDiagrams, autoSuggestHTMLUI, autoSuggestQuestions);

  return messageStatus.outcome === 'success';
@@ -88,9 +103,8 @@ export async function llmGenerateContentStream(
  contextName: VChatStreamContextName,
  contextRef: VChatContextRef,
  throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
-  autoSpeak: ChatAutoSpeakType,
-  onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
  abortSignal: AbortSignal,
+  onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
 ): Promise<StreamMessageStatus> {

  const returnStatus: StreamMessageStatus = {
@@ -98,9 +112,6 @@ export async function llmGenerateContentStream(
    errorMessage: undefined,
  };

-  // speak once
-  let spokenLine = false;
-
  // Throttling setup
  let lastCallTime = 0;
  let throttleDelay = 1000 / 12; // 12 messages per second works well for 60Hz displays (single chat, and 24 in 4 chats, see the square root below)
@@ -137,19 +148,6 @@ export async function llmGenerateContentStream(
      // Update the data store, with optional max-frequency throttling (e.g. OpenAI is downsamped 50 -> 12Hz)
      // This can be toggled from the settings
      throttledEditMessage(incrementalAnswer);
-
-      // 📢 TTS: first-line
-      if (textSoFar && autoSpeak === 'firstLine' && !spokenLine) {
-        let cutPoint = textSoFar.lastIndexOf('\n');
-        if (cutPoint < 0)
-          cutPoint = textSoFar.lastIndexOf('. ');
-        if (cutPoint > 100 && cutPoint < 400) {
-          spokenLine = true;
-          const firstParagraph = textSoFar.substring(0, cutPoint);
-          // fire/forget: we don't want to stall this loop
-          void speakText(firstParagraph);
-        }
-      }
    };

    await aixStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal, onUpdate);
@@ -168,12 +166,5 @@ export async function llmGenerateContentStream(
  // Ensure the last content is flushed out, and mark as complete
  onMessageUpdated({ ...incrementalAnswer, pendingIncomplete: undefined }, true);

-  // 📢 TTS: all
-  if ((autoSpeak === 'all' || autoSpeak === 'firstLine') && !spokenLine && !abortSignal.aborted) {
-    const incrementalText = messageFragmentsReduceText(incrementalAnswer.fragments);
-    if (incrementalText.length > 0)
-      void speakText(incrementalText);
-  }
-
  return returnStatus;
 }
@@ -0,0 +1,46 @@
+import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
+
+
+export type AutoSpeakType = 'off' | 'firstLine' | 'all';
+
+
+export class PersonaChatMessageSpeak {
+  private spokenLine: boolean = false;
+
+  constructor(private autoSpeakType: AutoSpeakType) {
+  }
+
+
+  handleTextSoFar(textSoFar: string): void {
+    if (this.spokenLine || this.autoSpeakType === 'off') return;
+
+    // 📢 TTS: first-line
+    if (this.autoSpeakType === 'firstLine') {
+      const cutPoint = this.findLastCutPoint(textSoFar);
+      if (cutPoint > 100 && cutPoint < 400) {
+        this.spokenLine = true;
+        const firstParagraph = textSoFar.substring(0, cutPoint);
+        this.speak(firstParagraph);
+      }
+    }
+  }
+
+  finalizeText(fullText: string): void {
+    if (!this.spokenLine && this.autoSpeakType !== 'off' && fullText.length > 0) {
+      this.speak(fullText);
+    }
+  }
+
+  private findLastCutPoint(text: string): number {
+    let cutPoint = text.lastIndexOf('\n');
+    if (cutPoint < 0)
+      cutPoint = text.lastIndexOf('. ');
+    return cutPoint;
+  }
+
+  private speak(text: string) {
+    console.log('📢 TTS:', text);
+    // fire/forget: we don't want to stall this loop
+    void speakText(text);
+  }
+}