From 42e97eed4c94748537c15b2f22a4d130efcccb2b Mon Sep 17 00:00:00 2001
From: Enrico Ros <enrico.ros@gmail.com>
Date: Thu, 4 Jul 2024 03:18:07 -0700
Subject: [PATCH] Persona: port Throttle

---
 src/apps/chat/editors/chat-persona.ts | 82 +++++++++++++++------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/src/apps/chat/editors/chat-persona.ts b/src/apps/chat/editors/chat-persona.ts
index 230277fa0..268b452f6 100644
--- a/src/apps/chat/editors/chat-persona.ts
+++ b/src/apps/chat/editors/chat-persona.ts
@@ -102,55 +102,37 @@ export async function llmGenerateContentStream(
   messagesHistory: VChatMessageIn[],
   contextName: VChatStreamContextName,
   contextRef: VChatContextRef,
-  throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
+  parallelViewCount: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce frequency with the square root
   abortSignal: AbortSignal,
   onMessageUpdated: (incrementalMessage: Partial<StreamMessageUpdate>, messageComplete: boolean) => void,
 ): Promise<StreamMessageStatus> {
 
-  const returnStatus: StreamMessageStatus = {
-    outcome: 'success',
-    errorMessage: undefined,
-  };
+  const returnStatus: StreamMessageStatus = { outcome: 'success', errorMessage: undefined };
 
-  // Throttling setup
-  let lastCallTime = 0;
-  let throttleDelay = 1000 / 12; // 12 messages per second works well for 60Hz displays (single chat, and 24 in 4 chats, see the square root below)
-  if (throttleUnits > 1)
-    throttleDelay = Math.round(throttleDelay * Math.sqrt(throttleUnits));
-
-  function throttledEditMessage(updatedMessage: Partial<StreamMessageUpdate>) {
-    const now = Date.now();
-    if (throttleUnits === 0 || now - lastCallTime >= throttleDelay) {
-      onMessageUpdated(updatedMessage, false);
-      lastCallTime = now;
-    }
-  }
+  const throttler = new ThrottleFunctionCall(parallelViewCount);
 
   // TODO: should clean this up once we have multi-fragment streaming/recombination
   const incrementalAnswer: StreamMessageUpdate = {
     fragments: [],
   };
 
-  console.log('PERSONA HERE');
-
   try {
 
-    const onUpdate = (update: StreamingClientUpdate, done: boolean) => {
-      // console.log('PERSONA UPDATE', update, done);
-      const textSoFar = update.textSoFar;
+    await aixStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal,
+      (update: StreamingClientUpdate, done: boolean) => {
 
-      // grow the incremental message
-      if (textSoFar) incrementalAnswer.fragments = messageFragmentsReplaceLastContentText(incrementalAnswer.fragments, textSoFar);
-      if (update.originLLM) incrementalAnswer.originLLM = update.originLLM;
-      if (update.typing !== undefined)
-        incrementalAnswer.pendingIncomplete = update.typing ? true : undefined;
+        // grow the incremental message
+        if (update.textSoFar) incrementalAnswer.fragments = messageFragmentsReplaceLastContentText(incrementalAnswer.fragments, update.textSoFar);
+        if (update.originLLM) incrementalAnswer.originLLM = update.originLLM;
+        if (update.typing !== undefined)
+          incrementalAnswer.pendingIncomplete = update.typing ? true : undefined;
 
-      // Update the data store, with optional max-frequency throttling (e.g. OpenAI is downsamped 50 -> 12Hz)
-      // This can be toggled from the settings
-      throttledEditMessage(incrementalAnswer);
-    };
-
-    await aixStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal, onUpdate);
+        // throttle the update
+        throttler.handleUpdate(() => {
+          onMessageUpdated(incrementalAnswer, false);
+        });
+      },
+    );
 
   } catch (error: any) {
     if (error?.name !== 'AbortError') {
@@ -164,7 +146,35 @@ export async function llmGenerateContentStream(
   }
 
   // Ensure the last content is flushed out, and mark as complete
-  onMessageUpdated({ ...incrementalAnswer, pendingIncomplete: undefined }, true);
+  throttler.finalize(() => {
+    onMessageUpdated({ ...incrementalAnswer, pendingIncomplete: undefined }, true);
+  });
 
   return returnStatus;
-}
\ No newline at end of file
+}
+
+
+export class ThrottleFunctionCall {
+  private readonly throttleDelay: number;
+  private lastCallTime: number = 0;
+
+  constructor(throttleUnits: number) {
+    // 12 messages per second works well for 60Hz displays (single chat, and 24 in 4 chats, see the square root below)
+    const baseDelayMs = 1000 / 12;
+    this.throttleDelay = throttleUnits === 0 ? 0
+      : throttleUnits > 1 ? Math.round(baseDelayMs * Math.sqrt(throttleUnits))
+        : baseDelayMs;
+  }
+
+  handleUpdate(fn: () => void): void {
+    const now = Date.now();
+    if (this.throttleDelay === 0 || this.lastCallTime === 0 || now - this.lastCallTime >= this.throttleDelay) {
+      fn();
+      this.lastCallTime = now;
+    }
+  }
+
+  finalize(fn: () => void): void {
+    fn(); // Always execute the final update
+  }
+}