Support for ElevenLabs (disabled by default)

This commit is contained in:
Enrico Ros
2023-04-12 22:50:11 -07:00
parent b7a27ec8cc
commit 876cf93527
5 changed files with 143 additions and 1 deletions
+6 -1
View File
@@ -5,4 +5,9 @@ OPENAI_API_KEY=
OPENAI_API_HOST=
# [Not needed] Sets the "OpenAI-Organization" header field to support organization users (UI > this > '')
OPENAI_API_ORG_ID=
OPENAI_API_ORG_ID=
# [Optional] Sets the API Key and Host for ElevenLabs, for optional text-to-speech
ELEVENLABS_API_KEY=
ELEVENLABS_API_HOST=
ELEVENLABS_VOICE_ID=
+4
View File
@@ -13,6 +13,7 @@ import { ConfirmationModal } from '@/components/dialogs/ConfirmationModal';
import { Link } from '@/components/util/Link';
import { PublishedModal } from '@/components/dialogs/PublishedModal';
import { createDMessage, DMessage, downloadConversationJson, useChatStore } from '@/lib/store-chats';
// import { playLastMessage } from '@/lib/text-to-speech';
import { publishConversation } from '@/lib/publish';
import { streamAssistantMessage, updateAutoConversationTitle } from '@/lib/ai';
import { useSettingsStore } from '@/lib/store-settings';
@@ -62,6 +63,9 @@ const runAssistantUpdatingState = async (conversationId: string, history: DMessa
// clear to send, again
startTyping(conversationId, null);
// play the first paragraph
// playLastMessage(conversationId).then(() => null);
// update text, if needed
await updateAutoConversationTitle(conversationId);
};
+56
View File
@@ -0,0 +1,56 @@
import { ApiElevenLabsSpeechBody } from '../pages/api/elevenlabs/speech';
import { useChatStore } from '@/lib/store-chats';
/**
* Very simple function to play the first paragraph of the last message in a conversation
*/
export async function playLastMessage(conversationId: string) {
const messages = useChatStore.getState().conversations.find(conversation => conversation.id === conversationId)?.messages;
if (!messages?.length) return;
// grab the first paragraph of the last message (and not shorter than 100 characters, if possible)
let text = '';
const paragraphs = messages[messages.length - 1].text.split('\n');
for (const paragraph of paragraphs) {
const trimmed = paragraph.trim();
if (text.length + trimmed.length > 100)
break;
text += (text.length > 0 ? '\n' : '') + trimmed;
}
if (!text) return;
try {
const audioBuffer = await convertTextToSpeech(text);
const audioContext = new AudioContext();
const bufferSource = audioContext.createBufferSource();
bufferSource.buffer = await audioContext.decodeAudioData(audioBuffer);
bufferSource.connect(audioContext.destination);
bufferSource.start();
} catch (error) {
console.error('Error playing first text:', error);
}
}
async function convertTextToSpeech(text: string): Promise<ArrayBuffer> {
const payload: ApiElevenLabsSpeechBody = {
text,
};
const response = await fetch('/api/elevenlabs/speech', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.error || errorData.message || 'Unknown error');
}
return await response.arrayBuffer();
}
+61
View File
@@ -0,0 +1,61 @@
// noinspection ExceptionCaughtLocallyJS
import { NextRequest, NextResponse } from 'next/server';
import { ApiPublishResponse } from '../publish';
import { ElevenLabs } from '@/types/api-elevenlabs';
async function postToElevenLabs<TBody extends object>(configuration: ElevenLabs.API.Configuration, apiPath: string, body: TBody, signal?: AbortSignal): Promise<Response> {
const apiHost = (configuration.apiHost || process.env.ELEVENLABS_API_HOST || 'api.elevenlabs.io').trim().replaceAll('https://', '');
const apiHeaders: HeadersInit = {
'Content-Type': 'application/json',
'xi-api-key': (configuration.apiKey || process.env.ELEVENLABS_API_KEY || '').trim(),
};
const response = await fetch(`https://${apiHost}${apiPath}`, {
method: 'POST',
headers: apiHeaders,
body: JSON.stringify(body),
signal,
});
if (!response.ok) {
const errorData = await response.json();
console.error('Error in ElevenLabs API:', errorData);
throw new Error('ElevenLabs error: ' + JSON.stringify(errorData));
}
return response;
}
export interface ApiElevenLabsSpeechBody {
api?: ElevenLabs.API.Configuration,
text: string,
voiceId?: string,
}
export default async function handler(req: NextRequest) {
const { api = {}, text, voiceId: userVoiceId } = (await req.json()) as ApiElevenLabsSpeechBody;
try {
if (!text) throw new Error('Missing text');
const voiceId = userVoiceId || process.env.ELEVENLABS_VOICE_ID || '21m00Tcm4TlvDq8ikWAM';
const response = await postToElevenLabs<ElevenLabs.API.TextToSpeech.Request>(api, `/v1/text-to-speech/${voiceId}`, { text });
const audioBuffer = await response.arrayBuffer();
return new NextResponse(audioBuffer, { status: 200, headers: { 'Content-Type': 'audio/mpeg' } });
} catch (error) {
console.error('Error posting to ElevenLabs', error);
return new NextResponse(JSON.stringify({
type: 'error',
error: error || 'Network issue',
} as ApiPublishResponse), { status: 500 });
}
}
// noinspection JSUnusedGlobalSymbols
export const config = {
runtime: 'edge',
};
+16
View File
@@ -0,0 +1,16 @@
export namespace ElevenLabs.API {
export interface Configuration {
apiKey?: string;
apiHost?: string;
}
export namespace TextToSpeech {
export interface Request {
text: string;
voice_settings?: {
stability: number;
similarity_boost: number;
};
}
}
}