diff --git a/pages/api/elevenlabs/speech.ts b/pages/api/elevenlabs/speech.ts
index f2105eb8b..18b847698 100644
--- a/pages/api/elevenlabs/speech.ts
+++ b/pages/api/elevenlabs/speech.ts
@@ -56,9 +56,13 @@ export async function postToElevenLabs
(apiKey: string, api
export default async function handler(req: NextRequest) {
try {
- const { apiKey = '', text, voiceId: userVoiceId } = (await req.json()) as ElevenLabs.API.TextToSpeech.RequestBody;
+ const { apiKey = '', text, voiceId: userVoiceId, nonEnglish } = (await req.json()) as ElevenLabs.API.TextToSpeech.RequestBody;
const voiceId = userVoiceId || process.env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
- const response = await postToElevenLabs(apiKey, `/v1/text-to-speech/${voiceId}`, { text });
+ const requestPayload: ElevenLabs.Wire.TextToSpeech.Request = {
+ text: text,
+ ...(nonEnglish ? { model_id: 'eleven_multilingual_v1' } : {}),
+ };
+ const response = await postToElevenLabs(apiKey, `/v1/text-to-speech/${voiceId}`, requestPayload);
const audioBuffer: ElevenLabs.API.TextToSpeech.Response = await response.arrayBuffer();
return new NextResponse(audioBuffer, { status: 200, headers: { 'Content-Type': 'audio/mpeg' } });
} catch (error) {
diff --git a/src/apps/settings/UISettings.tsx b/src/apps/settings/UISettings.tsx
index 9262e70b2..9fe20f5a5 100644
--- a/src/apps/settings/UISettings.tsx
+++ b/src/apps/settings/UISettings.tsx
@@ -127,7 +127,7 @@ export function UISettings() {
-
+
Language
diff --git a/src/modules/elevenlabs/ElevenlabsSettings.tsx b/src/modules/elevenlabs/ElevenlabsSettings.tsx
index 1eaaa672c..b57987a52 100644
--- a/src/modules/elevenlabs/ElevenlabsSettings.tsx
+++ b/src/modules/elevenlabs/ElevenlabsSettings.tsx
@@ -1,6 +1,5 @@
import * as React from 'react';
import { shallow } from 'zustand/shallow';
-import { useQuery } from '@tanstack/react-query';
import { Box, CircularProgress, FormControl, FormHelperText, FormLabel, IconButton, Input, Option, Radio, RadioGroup, Select, Stack } from '@mui/joy';
import KeyIcon from '@mui/icons-material/Key';
@@ -13,8 +12,7 @@ import { Section } from '@/common/components/Section';
import { settingsGap } from '@/common/theme';
import { useSettingsStore } from '@/common/state/store-settings';
-import { ElevenLabs } from './elevenlabs.types';
-import { isValidElevenLabsApiKey, requireUserKeyElevenLabs } from './elevenlabs.client';
+import { isValidElevenLabsApiKey, requireUserKeyElevenLabs, useElevenLabsVoices } from './elevenlabs.client';
export function ElevenlabsSettings() {
@@ -31,16 +29,7 @@ export function ElevenlabsSettings() {
const requiresKey = requireUserKeyElevenLabs;
const isValidKey = apiKey ? isValidElevenLabsApiKey(apiKey) : !requiresKey;
- // load voices, if the server has a key, or the user provided one
- const { data: voicesData, isLoading: loadingVoices } = useQuery(['voices', apiKey], {
- enabled: isValidKey,
- queryFn: () => fetch('/api/elevenlabs/voices', {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ ...(apiKey ? { apiKey: apiKey } : {}) }),
- }).then(res => res.json() as Promise),
- staleTime: 1000 * 60 * 5, // 5 minutes
- });
+ const { voicesData, loadingVoices } = useElevenLabsVoices(apiKey, isValidKey);
const handleToggleApiKeyVisibility = () => setShowApiKeyValue(!showApiKeyValue);
diff --git a/src/modules/elevenlabs/elevenlabs.client.ts b/src/modules/elevenlabs/elevenlabs.client.ts
index 661a6b01f..987cc10b4 100644
--- a/src/modules/elevenlabs/elevenlabs.client.ts
+++ b/src/modules/elevenlabs/elevenlabs.client.ts
@@ -1,4 +1,5 @@
import { ElevenLabs } from './elevenlabs.types';
+import { useQuery } from '@tanstack/react-query';
import { useSettingsStore } from '@/common/state/store-settings';
@@ -10,11 +11,11 @@ export const isValidElevenLabsApiKey = (apiKey?: string) => !!apiKey && apiKey.t
export async function speakText(text: string) {
if (!(text?.trim())) return;
- const { elevenLabsApiKey, elevenLabsVoiceId } = useSettingsStore.getState();
-
+ const { elevenLabsApiKey, elevenLabsVoiceId, preferredLanguage } = useSettingsStore.getState();
try {
// NOTE: hardcoded 1000 as a failsafe, since the API will take very long and consume lots of credits for longer texts
- const audioBuffer = await callElevenlabsSpeech(text.slice(0, 1000), elevenLabsApiKey, elevenLabsVoiceId);
+ const nonEnglish = !(preferredLanguage.toLowerCase().startsWith('en'));
+ const audioBuffer = await callElevenlabsSpeech(text.slice(0, 1000), elevenLabsApiKey, elevenLabsVoiceId, nonEnglish);
const audioContext = new AudioContext();
const bufferSource = audioContext.createBufferSource();
bufferSource.buffer = await audioContext.decodeAudioData(audioBuffer);
@@ -26,11 +27,12 @@ export async function speakText(text: string) {
}
-async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elevenLabsVoiceId: string): Promise {
+async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elevenLabsVoiceId: string, nonEnglish: boolean): Promise {
const payload: ElevenLabs.API.TextToSpeech.RequestBody = {
apiKey: elevenLabsApiKey,
text,
voiceId: elevenLabsVoiceId,
+ nonEnglish,
};
const response = await fetch('/api/elevenlabs/speech', {
@@ -46,3 +48,17 @@ async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elev
return await response.arrayBuffer();
}
+
+
+export function useElevenLabsVoices(apiKey: string, isEnabled: boolean) {
+ const { data: voicesData, isLoading: loadingVoices } = useQuery(['elevenlabs-voices', apiKey], {
+ enabled: isEnabled,
+ queryFn: () => fetch('/api/elevenlabs/voices', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ ...(apiKey ? { apiKey } : {}) }),
+ }).then(res => res.json() as Promise),
+ staleTime: 1000 * 60 * 5, // 5 minutes
+ });
+ return { voicesData, loadingVoices };
+}
diff --git a/src/modules/elevenlabs/elevenlabs.types.ts b/src/modules/elevenlabs/elevenlabs.types.ts
index 38302d7fd..b85535003 100644
--- a/src/modules/elevenlabs/elevenlabs.types.ts
+++ b/src/modules/elevenlabs/elevenlabs.types.ts
@@ -8,6 +8,7 @@ export namespace ElevenLabs {
apiKey?: string;
text: string;
voiceId?: string;
+ nonEnglish: boolean;
}
export type Response = ArrayBuffer;
@@ -36,6 +37,7 @@ export namespace ElevenLabs {
export namespace TextToSpeech {
export interface Request {
text: string;
+ model_id?: 'eleven_monolingual_v1' | string;
voice_settings?: {
stability: number;
similarity_boost: number;