Support for ElevenLabs (disabled by default)

2026-05-10 21:50:14 -07:00 · 2023-04-12 22:50:11 -07:00
parent b7a27ec8cc
commit 876cf93527
5 changed files with 143 additions and 1 deletions
@@ -5,4 +5,9 @@ OPENAI_API_KEY=
 OPENAI_API_HOST=

 # [Not needed] Sets the "OpenAI-Organization" header field to support organization users (UI > this > '')
-OPENAI_API_ORG_ID=
+OPENAI_API_ORG_ID=
+
+# [Optional] Sets the API Key and Host for ElevenLabs, for optional text-to-speech
+ELEVENLABS_API_KEY=
+ELEVENLABS_API_HOST=
+ELEVENLABS_VOICE_ID=
@@ -13,6 +13,7 @@ import { ConfirmationModal } from '@/components/dialogs/ConfirmationModal';
 import { Link } from '@/components/util/Link';
 import { PublishedModal } from '@/components/dialogs/PublishedModal';
 import { createDMessage, DMessage, downloadConversationJson, useChatStore } from '@/lib/store-chats';
+// import { playLastMessage } from '@/lib/text-to-speech';
 import { publishConversation } from '@/lib/publish';
 import { streamAssistantMessage, updateAutoConversationTitle } from '@/lib/ai';
 import { useSettingsStore } from '@/lib/store-settings';
@@ -62,6 +63,9 @@ const runAssistantUpdatingState = async (conversationId: string, history: DMessa
  // clear to send, again
  startTyping(conversationId, null);

+  // play the first paragraph
+  // playLastMessage(conversationId).then(() => null);
+
  // update text, if needed
  await updateAutoConversationTitle(conversationId);
 };
@@ -0,0 +1,56 @@
+import { ApiElevenLabsSpeechBody } from '../pages/api/elevenlabs/speech';
+
+import { useChatStore } from '@/lib/store-chats';
+
+
+/**
+ * Very simple function to play the first paragraph of the last message in a conversation
+ */
+export async function playLastMessage(conversationId: string) {
+
+  const messages = useChatStore.getState().conversations.find(conversation => conversation.id === conversationId)?.messages;
+  if (!messages?.length) return;
+
+  // grab the first paragraph of the last message (and not shorter than 100 characters, if possible)
+  let text = '';
+  const paragraphs = messages[messages.length - 1].text.split('\n');
+  for (const paragraph of paragraphs) {
+    const trimmed = paragraph.trim();
+    if (text.length + trimmed.length > 100)
+      break;
+    text += (text.length > 0 ? '\n' : '') + trimmed;
+  }
+  if (!text) return;
+
+  try {
+    const audioBuffer = await convertTextToSpeech(text);
+    const audioContext = new AudioContext();
+    const bufferSource = audioContext.createBufferSource();
+    bufferSource.buffer = await audioContext.decodeAudioData(audioBuffer);
+    bufferSource.connect(audioContext.destination);
+    bufferSource.start();
+  } catch (error) {
+    console.error('Error playing first text:', error);
+  }
+
+}
+
+
+async function convertTextToSpeech(text: string): Promise<ArrayBuffer> {
+  const payload: ApiElevenLabsSpeechBody = {
+    text,
+  };
+
+  const response = await fetch('/api/elevenlabs/speech', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(payload),
+  });
+
+  if (!response.ok) {
+    const errorData = await response.json();
+    throw new Error(errorData.error || errorData.message || 'Unknown error');
+  }
+
+  return await response.arrayBuffer();
+}
@@ -0,0 +1,61 @@
+// noinspection ExceptionCaughtLocallyJS
+
+import { NextRequest, NextResponse } from 'next/server';
+
+import { ApiPublishResponse } from '../publish';
+import { ElevenLabs } from '@/types/api-elevenlabs';
+
+
+async function postToElevenLabs<TBody extends object>(configuration: ElevenLabs.API.Configuration, apiPath: string, body: TBody, signal?: AbortSignal): Promise<Response> {
+
+  const apiHost = (configuration.apiHost || process.env.ELEVENLABS_API_HOST || 'api.elevenlabs.io').trim().replaceAll('https://', '');
+  const apiHeaders: HeadersInit = {
+    'Content-Type': 'application/json',
+    'xi-api-key': (configuration.apiKey || process.env.ELEVENLABS_API_KEY || '').trim(),
+  };
+
+  const response = await fetch(`https://${apiHost}${apiPath}`, {
+    method: 'POST',
+    headers: apiHeaders,
+    body: JSON.stringify(body),
+    signal,
+  });
+
+  if (!response.ok) {
+    const errorData = await response.json();
+    console.error('Error in ElevenLabs API:', errorData);
+    throw new Error('ElevenLabs error: ' + JSON.stringify(errorData));
+  }
+
+  return response;
+}
+
+
+export interface ApiElevenLabsSpeechBody {
+  api?: ElevenLabs.API.Configuration,
+  text: string,
+  voiceId?: string,
+}
+
+
+export default async function handler(req: NextRequest) {
+  const { api = {}, text, voiceId: userVoiceId } = (await req.json()) as ApiElevenLabsSpeechBody;
+  try {
+    if (!text) throw new Error('Missing text');
+    const voiceId = userVoiceId || process.env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
+    const response = await postToElevenLabs<ElevenLabs.API.TextToSpeech.Request>(api, `/v1/text-to-speech/${voiceId}`, { text });
+    const audioBuffer = await response.arrayBuffer();
+    return new NextResponse(audioBuffer, { status: 200, headers: { 'Content-Type': 'audio/mpeg' } });
+  } catch (error) {
+    console.error('Error posting to ElevenLabs', error);
+    return new NextResponse(JSON.stringify({
+      type: 'error',
+      error: error || 'Network issue',
+    } as ApiPublishResponse), { status: 500 });
+  }
+}
+
+// noinspection JSUnusedGlobalSymbols
+export const config = {
+  runtime: 'edge',
+};
@@ -0,0 +1,16 @@
+export namespace ElevenLabs.API {
+  export interface Configuration {
+    apiKey?: string;
+    apiHost?: string;
+  }
+
+  export namespace TextToSpeech {
+    export interface Request {
+      text: string;
+      voice_settings?: {
+        stability: number;
+        similarity_boost: number;
+      };
+    }
+  }
+}