Persona: port AutoSpeak

This commit is contained in:
Enrico Ros
2024-07-04 02:55:10 -07:00
parent 9e705a12b1
commit 065f30ac38
2 changed files with 70 additions and 33 deletions
+24 -33
View File
@@ -3,14 +3,15 @@ import type { VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/
import { aixStreamingChatGenerate, StreamingClientUpdate } from '~/modules/aix/client/aix.client';
import { autoConversationTitle } from '~/modules/aifn/autotitle/autoTitle';
import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
import { PersonaChatMessageSpeak } from './persona/PersonaChatMessageSpeak';
import type { DConversationId } from '~/common/stores/chat/chat.conversation';
import { ConversationsManager } from '~/common/chats/ConversationsManager';
import { DMessage, messageFragmentsReduceText, messageFragmentsReplaceLastContentText, messageSingleTextOrThrow } from '~/common/stores/chat/chat.message';
import { DMessage, messageFragmentsReplaceLastContentText, messageSingleTextOrThrow } from '~/common/stores/chat/chat.message';
import { getUXLabsHighPerformance } from '~/common/state/store-ux-labs';
import { isContentFragment, isTextPart } from '~/common/stores/chat/chat.fragments';
import { ChatAutoSpeakType, getChatAutoAI } from '../store-app-chat';
import { getChatAutoAI } from '../store-app-chat';
import { getInstantAppChatPanesCount } from '../components/panes/usePanesManager';
@@ -37,6 +38,9 @@ export async function runPersonaOnConversationHead(
{ originLLM: assistantLlmId, purposeId: history[0].purposeId },
);
// AutoSpeak
const autoSpeaker = autoSpeak !== 'off' ? new PersonaChatMessageSpeak(autoSpeak) : null;
// when an abort controller is set, the UI switches to the "stop" mode
const abortController = new AbortController();
cHandler.setAbortController(abortController);
@@ -55,13 +59,24 @@ export async function runPersonaOnConversationHead(
'conversation',
conversationId,
parallelViewCount,
autoSpeak,
(accumulatedMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => {
cHandler.messageEdit(assistantMessageId, accumulatedMessage, messageComplete, false);
},
abortController.signal,
(accumulatedMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => {
if (abortController.signal.aborted) return;
cHandler.messageEdit(assistantMessageId, accumulatedMessage, messageComplete, false);
if (autoSpeaker && accumulatedMessage.fragments?.length && isContentFragment(accumulatedMessage.fragments[0]) && isTextPart(accumulatedMessage.fragments[0].part)) {
if (messageComplete)
autoSpeaker.finalizeText(accumulatedMessage.fragments[0].part.text);
else
autoSpeaker.handleTextSoFar(accumulatedMessage.fragments[0].part.text);
}
},
);
// check if aborted
const hasBeenAborted = abortController.signal.aborted;
// clear to send, again
// FIXME: race condition? (for sure!)
cHandler.setAbortController(null);
@@ -71,7 +86,7 @@ export async function runPersonaOnConversationHead(
void autoConversationTitle(conversationId, false);
}
if (autoSuggestDiagrams || autoSuggestHTMLUI || autoSuggestQuestions)
if (!hasBeenAborted && (autoSuggestDiagrams || autoSuggestHTMLUI || autoSuggestQuestions))
autoSuggestions(null, conversationId, assistantMessageId, autoSuggestDiagrams, autoSuggestHTMLUI, autoSuggestQuestions);
return messageStatus.outcome === 'success';
@@ -88,9 +103,8 @@ export async function llmGenerateContentStream(
contextName: VChatStreamContextName,
contextRef: VChatContextRef,
throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
autoSpeak: ChatAutoSpeakType,
onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
abortSignal: AbortSignal,
onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
): Promise<StreamMessageStatus> {
const returnStatus: StreamMessageStatus = {
@@ -98,9 +112,6 @@ export async function llmGenerateContentStream(
errorMessage: undefined,
};
// speak once
let spokenLine = false;
// Throttling setup
let lastCallTime = 0;
let throttleDelay = 1000 / 12; // 12 messages per second works well for 60Hz displays (single chat, and 24 in 4 chats, see the square root below)
@@ -137,19 +148,6 @@ export async function llmGenerateContentStream(
// Update the data store, with optional max-frequency throttling (e.g. OpenAI is downsamped 50 -> 12Hz)
// This can be toggled from the settings
throttledEditMessage(incrementalAnswer);
// 📢 TTS: first-line
if (textSoFar && autoSpeak === 'firstLine' && !spokenLine) {
let cutPoint = textSoFar.lastIndexOf('\n');
if (cutPoint < 0)
cutPoint = textSoFar.lastIndexOf('. ');
if (cutPoint > 100 && cutPoint < 400) {
spokenLine = true;
const firstParagraph = textSoFar.substring(0, cutPoint);
// fire/forget: we don't want to stall this loop
void speakText(firstParagraph);
}
}
};
await aixStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal, onUpdate);
@@ -168,12 +166,5 @@ export async function llmGenerateContentStream(
// Ensure the last content is flushed out, and mark as complete
onMessageUpdated({ ...incrementalAnswer, pendingIncomplete: undefined }, true);
// 📢 TTS: all
if ((autoSpeak === 'all' || autoSpeak === 'firstLine') && !spokenLine && !abortSignal.aborted) {
const incrementalText = messageFragmentsReduceText(incrementalAnswer.fragments);
if (incrementalText.length > 0)
void speakText(incrementalText);
}
return returnStatus;
}
@@ -0,0 +1,46 @@
import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
export type AutoSpeakType = 'off' | 'firstLine' | 'all';
export class PersonaChatMessageSpeak {
private spokenLine: boolean = false;
constructor(private autoSpeakType: AutoSpeakType) {
}
handleTextSoFar(textSoFar: string): void {
if (this.spokenLine || this.autoSpeakType === 'off') return;
// 📢 TTS: first-line
if (this.autoSpeakType === 'firstLine') {
const cutPoint = this.findLastCutPoint(textSoFar);
if (cutPoint > 100 && cutPoint < 400) {
this.spokenLine = true;
const firstParagraph = textSoFar.substring(0, cutPoint);
this.speak(firstParagraph);
}
}
}
finalizeText(fullText: string): void {
if (!this.spokenLine && this.autoSpeakType !== 'off' && fullText.length > 0) {
this.speak(fullText);
}
}
private findLastCutPoint(text: string): number {
let cutPoint = text.lastIndexOf('\n');
if (cutPoint < 0)
cutPoint = text.lastIndexOf('. ');
return cutPoint;
}
private speak(text: string) {
console.log('📢 TTS:', text);
// fire/forget: we don't want to stall this loop
void speakText(text);
}
}