mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
Support multiLingual Voice Synthesis (Eleven Labs)
By switching the Language setting to something other than English, the better 'multilingual' ElevenLabs model will be selected.
This commit is contained in:
@@ -56,9 +56,13 @@ export async function postToElevenLabs<TBody extends object>(apiKey: string, api
|
||||
|
||||
export default async function handler(req: NextRequest) {
|
||||
try {
|
||||
const { apiKey = '', text, voiceId: userVoiceId } = (await req.json()) as ElevenLabs.API.TextToSpeech.RequestBody;
|
||||
const { apiKey = '', text, voiceId: userVoiceId, nonEnglish } = (await req.json()) as ElevenLabs.API.TextToSpeech.RequestBody;
|
||||
const voiceId = userVoiceId || process.env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
|
||||
const response = await postToElevenLabs<ElevenLabs.Wire.TextToSpeech.Request>(apiKey, `/v1/text-to-speech/${voiceId}`, { text });
|
||||
const requestPayload: ElevenLabs.Wire.TextToSpeech.Request = {
|
||||
text: text,
|
||||
...(nonEnglish ? { model_id: 'eleven_multilingual_v1' } : {}),
|
||||
};
|
||||
const response = await postToElevenLabs<ElevenLabs.Wire.TextToSpeech.Request>(apiKey, `/v1/text-to-speech/${voiceId}`, requestPayload);
|
||||
const audioBuffer: ElevenLabs.API.TextToSpeech.Response = await response.arrayBuffer();
|
||||
return new NextResponse(audioBuffer, { status: 200, headers: { 'Content-Type': 'audio/mpeg' } });
|
||||
} catch (error) {
|
||||
|
||||
@@ -127,7 +127,7 @@ export function UISettings() {
|
||||
|
||||
<FormControl orientation='horizontal' sx={{ alignItems: 'center', justifyContent: 'space-between' }}>
|
||||
<Box>
|
||||
<Tooltip title='Currently for Microphone input only. Language support varies by browser. Note: iPhone/Safari lacks speech input.'>
|
||||
<Tooltip title='Currently for Microphone input and Voice output. Microphone support varies by browser (iPhone/Safari lacks speech input). We will use the ElevenLabs MultiLanguage model if a language other than English is selected.'>
|
||||
<FormLabel>
|
||||
Language <InfoOutlinedIcon sx={{ mx: 0.5 }} />
|
||||
</FormLabel>
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import * as React from 'react';
|
||||
import { shallow } from 'zustand/shallow';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
|
||||
import { Box, CircularProgress, FormControl, FormHelperText, FormLabel, IconButton, Input, Option, Radio, RadioGroup, Select, Stack } from '@mui/joy';
|
||||
import KeyIcon from '@mui/icons-material/Key';
|
||||
@@ -13,8 +12,7 @@ import { Section } from '@/common/components/Section';
|
||||
import { settingsGap } from '@/common/theme';
|
||||
import { useSettingsStore } from '@/common/state/store-settings';
|
||||
|
||||
import { ElevenLabs } from './elevenlabs.types';
|
||||
import { isValidElevenLabsApiKey, requireUserKeyElevenLabs } from './elevenlabs.client';
|
||||
import { isValidElevenLabsApiKey, requireUserKeyElevenLabs, useElevenLabsVoices } from './elevenlabs.client';
|
||||
|
||||
|
||||
export function ElevenlabsSettings() {
|
||||
@@ -31,16 +29,7 @@ export function ElevenlabsSettings() {
|
||||
const requiresKey = requireUserKeyElevenLabs;
|
||||
const isValidKey = apiKey ? isValidElevenLabsApiKey(apiKey) : !requiresKey;
|
||||
|
||||
// load voices, if the server has a key, or the user provided one
|
||||
const { data: voicesData, isLoading: loadingVoices } = useQuery(['voices', apiKey], {
|
||||
enabled: isValidKey,
|
||||
queryFn: () => fetch('/api/elevenlabs/voices', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ...(apiKey ? { apiKey: apiKey } : {}) }),
|
||||
}).then(res => res.json() as Promise<ElevenLabs.API.Voices.Response>),
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
});
|
||||
const { voicesData, loadingVoices } = useElevenLabsVoices(apiKey, isValidKey);
|
||||
|
||||
const handleToggleApiKeyVisibility = () => setShowApiKeyValue(!showApiKeyValue);
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { ElevenLabs } from './elevenlabs.types';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { useSettingsStore } from '@/common/state/store-settings';
|
||||
|
||||
|
||||
@@ -10,11 +11,11 @@ export const isValidElevenLabsApiKey = (apiKey?: string) => !!apiKey && apiKey.t
|
||||
export async function speakText(text: string) {
|
||||
if (!(text?.trim())) return;
|
||||
|
||||
const { elevenLabsApiKey, elevenLabsVoiceId } = useSettingsStore.getState();
|
||||
|
||||
const { elevenLabsApiKey, elevenLabsVoiceId, preferredLanguage } = useSettingsStore.getState();
|
||||
try {
|
||||
// NOTE: hardcoded 1000 as a failsafe, since the API will take very long and consume lots of credits for longer texts
|
||||
const audioBuffer = await callElevenlabsSpeech(text.slice(0, 1000), elevenLabsApiKey, elevenLabsVoiceId);
|
||||
const nonEnglish = !(preferredLanguage.toLowerCase().startsWith('en'));
|
||||
const audioBuffer = await callElevenlabsSpeech(text.slice(0, 1000), elevenLabsApiKey, elevenLabsVoiceId, nonEnglish);
|
||||
const audioContext = new AudioContext();
|
||||
const bufferSource = audioContext.createBufferSource();
|
||||
bufferSource.buffer = await audioContext.decodeAudioData(audioBuffer);
|
||||
@@ -26,11 +27,12 @@ export async function speakText(text: string) {
|
||||
}
|
||||
|
||||
|
||||
async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elevenLabsVoiceId: string): Promise<ArrayBuffer> {
|
||||
async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elevenLabsVoiceId: string, nonEnglish: boolean): Promise<ArrayBuffer> {
|
||||
const payload: ElevenLabs.API.TextToSpeech.RequestBody = {
|
||||
apiKey: elevenLabsApiKey,
|
||||
text,
|
||||
voiceId: elevenLabsVoiceId,
|
||||
nonEnglish,
|
||||
};
|
||||
|
||||
const response = await fetch('/api/elevenlabs/speech', {
|
||||
@@ -46,3 +48,17 @@ async function callElevenlabsSpeech(text: string, elevenLabsApiKey: string, elev
|
||||
|
||||
return await response.arrayBuffer();
|
||||
}
|
||||
|
||||
|
||||
export function useElevenLabsVoices(apiKey: string, isEnabled: boolean) {
|
||||
const { data: voicesData, isLoading: loadingVoices } = useQuery(['elevenlabs-voices', apiKey], {
|
||||
enabled: isEnabled,
|
||||
queryFn: () => fetch('/api/elevenlabs/voices', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ...(apiKey ? { apiKey } : {}) }),
|
||||
}).then(res => res.json() as Promise<ElevenLabs.API.Voices.Response>),
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
});
|
||||
return { voicesData, loadingVoices };
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ export namespace ElevenLabs {
|
||||
apiKey?: string;
|
||||
text: string;
|
||||
voiceId?: string;
|
||||
nonEnglish: boolean;
|
||||
}
|
||||
|
||||
export type Response = ArrayBuffer;
|
||||
@@ -36,6 +37,7 @@ export namespace ElevenLabs {
|
||||
export namespace TextToSpeech {
|
||||
export interface Request {
|
||||
text: string;
|
||||
model_id?: 'eleven_monolingual_v1' | string;
|
||||
voice_settings?: {
|
||||
stability: number;
|
||||
similarity_boost: number;
|
||||
|
||||
Reference in New Issue
Block a user