Support multiLingual Voice Synthesis (Eleven Labs)

By switching the Language setting to something other than English, the better 'multilingual' ElevenLabs model will be selected.
2026-05-11 14:10:15 -07:00 · 2023-04-29 21:16:32 -07:00
parent 90ce07ea75
commit 91bbcac6d7
5 changed files with 31 additions and 20 deletions
@@ -56,9 +56,13 @@ export async function postToElevenLabs<TBody extends object>(apiKey: string, api

 export default async function handler(req: NextRequest) {
  try {
-    const { apiKey = '', text, voiceId: userVoiceId } = (await req.json()) as ElevenLabs.API.TextToSpeech.RequestBody;
+    const { apiKey = '', text, voiceId: userVoiceId, nonEnglish } = (await req.json()) as ElevenLabs.API.TextToSpeech.RequestBody;
    const voiceId = userVoiceId || process.env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
-    const response = await postToElevenLabs<ElevenLabs.Wire.TextToSpeech.Request>(apiKey, `/v1/text-to-speech/${voiceId}`, { text });
+    const requestPayload: ElevenLabs.Wire.TextToSpeech.Request = {
+      text: text,
+      ...(nonEnglish ? { model_id: 'eleven_multilingual_v1' } : {}),
+    };
+    const response = await postToElevenLabs<ElevenLabs.Wire.TextToSpeech.Request>(apiKey, `/v1/text-to-speech/${voiceId}`, requestPayload);
    const audioBuffer: ElevenLabs.API.TextToSpeech.Response = await response.arrayBuffer();
    return new NextResponse(audioBuffer, { status: 200, headers: { 'Content-Type': 'audio/mpeg' } });
  } catch (error) {
@@ -127,7 +127,7 @@ export function UISettings() {

        <FormControl orientation='horizontal' sx={{ alignItems: 'center', justifyContent: 'space-between' }}>
          <Box>
-            <Tooltip title='Currently for Microphone input only. Language support varies by browser. Note: iPhone/Safari lacks speech input.'>
+            <Tooltip title='Currently for Microphone input and Voice output. Microphone support varies by browser (iPhone/Safari lacks speech input). We will use the ElevenLabs MultiLanguage model if a language other than English is selected.'>
              <FormLabel>
                Language <InfoOutlinedIcon sx={{ mx: 0.5 }} />
              </FormLabel>
@@ -1,6 +1,5 @@
 import * as React from 'react';
 import { shallow } from 'zustand/shallow';
-import { useQuery } from '@tanstack/react-query';

 import { Box, CircularProgress, FormControl, FormHelperText, FormLabel, IconButton, Input, Option, Radio, RadioGroup, Select, Stack } from '@mui/joy';
 import KeyIcon from '@mui/icons-material/Key';
@@ -13,8 +12,7 @@ import { Section } from '@/common/components/Section';
 import { settingsGap } from '@/common/theme';
 import { useSettingsStore } from '@/common/state/store-settings';

-import { ElevenLabs } from './elevenlabs.types';
-import { isValidElevenLabsApiKey, requireUserKeyElevenLabs } from './elevenlabs.client';
+import { isValidElevenLabsApiKey, requireUserKeyElevenLabs, useElevenLabsVoices } from './elevenlabs.client';


 export function ElevenlabsSettings() {
@@ -31,16 +29,7 @@ export function ElevenlabsSettings() {
  const requiresKey = requireUserKeyElevenLabs;
  const isValidKey = apiKey ? isValidElevenLabsApiKey(apiKey) : !requiresKey;

-  // load voices, if the server has a key, or the user provided one
-  const { data: voicesData, isLoading: loadingVoices } = useQuery(['voices', apiKey], {
-    enabled: isValidKey,
-    queryFn: () => fetch('/api/elevenlabs/voices', {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ ...(apiKey ? { apiKey: apiKey } : {}) }),
-    }).then(res => res.json() as Promise<ElevenLabs.API.Voices.Response>),
-    staleTime: 1000 * 60 * 5, // 5 minutes
-  });
+  const { voicesData, loadingVoices } = useElevenLabsVoices(apiKey, isValidKey);

  const handleToggleApiKeyVisibility = () => setShowApiKeyValue(!showApiKeyValue);

@@ -1,4 +1,5 @@
 import { ElevenLabs } from './elevenlabs.types';
+import { useQuery } from '@tanstack/react-query';
 import { useSettingsStore } from '@/common/state/store-settings';


@@ -10,11 +11,11 @@ export const isValidElevenLabsApiKey = (apiKey?: string) => !!apiKey && apiKey.t
 export async function speakText(text: string) {
  if (!(text?.trim())) return;

-  const { elevenLabsApiKey, elevenLabsVoiceId } = useSettingsStore.getState();
-
+  const { elevenLabsApiKey, elevenLabsVoiceId, preferredLanguage } = useSettingsStore.getState();
  try {
    // NOTE: hardcoded 1000 as a failsafe, since the API will take very long and consume lots of credits for longer texts
-    const audioBuffer = await callElevenlabsSpeech(text.slice(0, 1000), elevenLabsApiKey, elevenLabsVoiceId);
+    const nonEnglish = !(preferredLanguage.toLowerCase().startsWith('en'));
+    const audioBuffer = await callElevenlabsSpeech(text.slice(0, 1000), elevenLabsApiKey, elevenLabsVoiceId, nonEnglish);
    const audioContext = new AudioContext();
    const bufferSource = audioContext.createBufferSource();
    bufferSource.buffer = await audioContext.decodeAudioData(audioBuffer);
@@ -26,11 +27,12 @@ export async function speakText(text: string) {
 }


-async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elevenLabsVoiceId: string): Promise<ArrayBuffer> {
+async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elevenLabsVoiceId: string, nonEnglish: boolean): Promise<ArrayBuffer> {
  const payload: ElevenLabs.API.TextToSpeech.RequestBody = {
    apiKey: elevenLabsApiKey,
    text,
    voiceId: elevenLabsVoiceId,
+    nonEnglish,
  };

  const response = await fetch('/api/elevenlabs/speech', {
@@ -46,3 +48,17 @@ async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elev

  return await response.arrayBuffer();
 }
+
+
+export function useElevenLabsVoices(apiKey: string, isEnabled: boolean) {
+  const { data: voicesData, isLoading: loadingVoices } = useQuery(['elevenlabs-voices', apiKey], {
+    enabled: isEnabled,
+    queryFn: () => fetch('/api/elevenlabs/voices', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ ...(apiKey ? { apiKey } : {}) }),
+    }).then(res => res.json() as Promise<ElevenLabs.API.Voices.Response>),
+    staleTime: 1000 * 60 * 5, // 5 minutes
+  });
+  return { voicesData, loadingVoices };
+}
@@ -8,6 +8,7 @@ export namespace ElevenLabs {
        apiKey?: string;
        text: string;
        voiceId?: string;
+        nonEnglish: boolean;
      }

      export type Response = ArrayBuffer;
@@ -36,6 +37,7 @@ export namespace ElevenLabs {
    export namespace TextToSpeech {
      export interface Request {
        text: string;
+        model_id?: 'eleven_monolingual_v1' | string;
        voice_settings?: {
          stability: number;
          similarity_boost: number;