mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Support for ElevenLabs (disabled by default)
This commit is contained in:
+6
-1
@@ -5,4 +5,9 @@ OPENAI_API_KEY=
|
||||
OPENAI_API_HOST=
|
||||
|
||||
# [Not needed] Sets the "OpenAI-Organization" header field to support organization users (UI > this > '')
|
||||
OPENAI_API_ORG_ID=
|
||||
OPENAI_API_ORG_ID=
|
||||
|
||||
# [Optional] Sets the API Key and Host for ElevenLabs, for optional text-to-speech
|
||||
ELEVENLABS_API_KEY=
|
||||
ELEVENLABS_API_HOST=
|
||||
ELEVENLABS_VOICE_ID=
|
||||
@@ -13,6 +13,7 @@ import { ConfirmationModal } from '@/components/dialogs/ConfirmationModal';
|
||||
import { Link } from '@/components/util/Link';
|
||||
import { PublishedModal } from '@/components/dialogs/PublishedModal';
|
||||
import { createDMessage, DMessage, downloadConversationJson, useChatStore } from '@/lib/store-chats';
|
||||
// import { playLastMessage } from '@/lib/text-to-speech';
|
||||
import { publishConversation } from '@/lib/publish';
|
||||
import { streamAssistantMessage, updateAutoConversationTitle } from '@/lib/ai';
|
||||
import { useSettingsStore } from '@/lib/store-settings';
|
||||
@@ -62,6 +63,9 @@ const runAssistantUpdatingState = async (conversationId: string, history: DMessa
|
||||
// clear to send, again
|
||||
startTyping(conversationId, null);
|
||||
|
||||
// play the first paragraph
|
||||
// playLastMessage(conversationId).then(() => null);
|
||||
|
||||
// update text, if needed
|
||||
await updateAutoConversationTitle(conversationId);
|
||||
};
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
import { ApiElevenLabsSpeechBody } from '../pages/api/elevenlabs/speech';
|
||||
|
||||
import { useChatStore } from '@/lib/store-chats';
|
||||
|
||||
|
||||
/**
|
||||
* Very simple function to play the first paragraph of the last message in a conversation
|
||||
*/
|
||||
export async function playLastMessage(conversationId: string) {
|
||||
|
||||
const messages = useChatStore.getState().conversations.find(conversation => conversation.id === conversationId)?.messages;
|
||||
if (!messages?.length) return;
|
||||
|
||||
// grab the first paragraph of the last message (and not shorter than 100 characters, if possible)
|
||||
let text = '';
|
||||
const paragraphs = messages[messages.length - 1].text.split('\n');
|
||||
for (const paragraph of paragraphs) {
|
||||
const trimmed = paragraph.trim();
|
||||
if (text.length + trimmed.length > 100)
|
||||
break;
|
||||
text += (text.length > 0 ? '\n' : '') + trimmed;
|
||||
}
|
||||
if (!text) return;
|
||||
|
||||
try {
|
||||
const audioBuffer = await convertTextToSpeech(text);
|
||||
const audioContext = new AudioContext();
|
||||
const bufferSource = audioContext.createBufferSource();
|
||||
bufferSource.buffer = await audioContext.decodeAudioData(audioBuffer);
|
||||
bufferSource.connect(audioContext.destination);
|
||||
bufferSource.start();
|
||||
} catch (error) {
|
||||
console.error('Error playing first text:', error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
async function convertTextToSpeech(text: string): Promise<ArrayBuffer> {
|
||||
const payload: ApiElevenLabsSpeechBody = {
|
||||
text,
|
||||
};
|
||||
|
||||
const response = await fetch('/api/elevenlabs/speech', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json();
|
||||
throw new Error(errorData.error || errorData.message || 'Unknown error');
|
||||
}
|
||||
|
||||
return await response.arrayBuffer();
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
|
||||
import { ApiPublishResponse } from '../publish';
|
||||
import { ElevenLabs } from '@/types/api-elevenlabs';
|
||||
|
||||
|
||||
async function postToElevenLabs<TBody extends object>(configuration: ElevenLabs.API.Configuration, apiPath: string, body: TBody, signal?: AbortSignal): Promise<Response> {
|
||||
|
||||
const apiHost = (configuration.apiHost || process.env.ELEVENLABS_API_HOST || 'api.elevenlabs.io').trim().replaceAll('https://', '');
|
||||
const apiHeaders: HeadersInit = {
|
||||
'Content-Type': 'application/json',
|
||||
'xi-api-key': (configuration.apiKey || process.env.ELEVENLABS_API_KEY || '').trim(),
|
||||
};
|
||||
|
||||
const response = await fetch(`https://${apiHost}${apiPath}`, {
|
||||
method: 'POST',
|
||||
headers: apiHeaders,
|
||||
body: JSON.stringify(body),
|
||||
signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json();
|
||||
console.error('Error in ElevenLabs API:', errorData);
|
||||
throw new Error('ElevenLabs error: ' + JSON.stringify(errorData));
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
export interface ApiElevenLabsSpeechBody {
|
||||
api?: ElevenLabs.API.Configuration,
|
||||
text: string,
|
||||
voiceId?: string,
|
||||
}
|
||||
|
||||
|
||||
export default async function handler(req: NextRequest) {
|
||||
const { api = {}, text, voiceId: userVoiceId } = (await req.json()) as ApiElevenLabsSpeechBody;
|
||||
try {
|
||||
if (!text) throw new Error('Missing text');
|
||||
const voiceId = userVoiceId || process.env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
|
||||
const response = await postToElevenLabs<ElevenLabs.API.TextToSpeech.Request>(api, `/v1/text-to-speech/${voiceId}`, { text });
|
||||
const audioBuffer = await response.arrayBuffer();
|
||||
return new NextResponse(audioBuffer, { status: 200, headers: { 'Content-Type': 'audio/mpeg' } });
|
||||
} catch (error) {
|
||||
console.error('Error posting to ElevenLabs', error);
|
||||
return new NextResponse(JSON.stringify({
|
||||
type: 'error',
|
||||
error: error || 'Network issue',
|
||||
} as ApiPublishResponse), { status: 500 });
|
||||
}
|
||||
}
|
||||
|
||||
// noinspection JSUnusedGlobalSymbols
|
||||
export const config = {
|
||||
runtime: 'edge',
|
||||
};
|
||||
@@ -0,0 +1,16 @@
|
||||
export namespace ElevenLabs.API {
|
||||
export interface Configuration {
|
||||
apiKey?: string;
|
||||
apiHost?: string;
|
||||
}
|
||||
|
||||
export namespace TextToSpeech {
|
||||
export interface Request {
|
||||
text: string;
|
||||
voice_settings?: {
|
||||
stability: number;
|
||||
similarity_boost: number;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user