mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-10 21:50:14 -07:00
Attachments: youtube links as transcripts
This commit is contained in:
@@ -15,6 +15,7 @@ import TelegramIcon from '@mui/icons-material/Telegram';
|
||||
import TextFieldsIcon from '@mui/icons-material/TextFields';
|
||||
import TextureIcon from '@mui/icons-material/Texture';
|
||||
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
|
||||
import YouTubeIcon from '@mui/icons-material/YouTube';
|
||||
|
||||
import { RenderImageURL } from '~/modules/blocks/image/RenderImageURL';
|
||||
|
||||
@@ -93,6 +94,8 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com
|
||||
'url-page-html': HtmlIcon, // was LanguageIcon
|
||||
'url-page-null': TextureIcon,
|
||||
'url-page-image': ImageOutlinedIcon,
|
||||
'youtube-transcript': YouTubeIcon,
|
||||
'youtube-transcript-simple': YouTubeIcon,
|
||||
'ego-fragments-inlined': TelegramIcon,
|
||||
'unhandled': TextureIcon,
|
||||
};
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { callBrowseFetchPage } from '~/modules/browse/browse.client';
|
||||
import { extractYoutubeVideoIDFromURL } from '~/modules/youtube/youtube.utils';
|
||||
import { youTubeFetchTranscript } from '~/modules/youtube/useYouTubeTranscript';
|
||||
|
||||
import { agiCustomId, agiUuid } from '~/common/util/idUtils';
|
||||
import { htmlTableToMarkdown } from '~/common/util/htmlTableToMarkdown';
|
||||
@@ -8,7 +10,7 @@ import { pdfToImageDataURLs, pdfToText } from '~/common/util/pdfUtils';
|
||||
import { createDMessageDataInlineText, createDocAttachmentFragment, DMessageAttachmentFragment, DMessageDataInline, DMessageDocPart, DVMimeType, isContentOrAttachmentFragment, isDocPart, specialContentPartToDocAttachmentFragment } from '~/common/stores/chat/chat.fragments';
|
||||
import { liveFileCreateOrThrow } from '~/common/livefile/store-live-file';
|
||||
|
||||
import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource, DraftEgoFragmentsInputData, DraftWebInputData } from './attachment.types';
|
||||
import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource, DraftEgoFragmentsInputData, DraftWebInputData, DraftYouTubeInputData } from './attachment.types';
|
||||
import type { AttachmentsDraftsStore } from './store-attachment-drafts-slice';
|
||||
import { guessInputContentTypeFromMime, heuristicMimeTypeFixup, mimeTypeIsDocX, mimeTypeIsPDF, mimeTypeIsPlainText, mimeTypeIsSupportedImage, reverseLookupMimeType } from './attachment.mimetypes';
|
||||
import { imageDataToImageAttachmentFragmentViaDBlob } from './attachment.dblobs';
|
||||
@@ -23,6 +25,7 @@ const PDF_IMAGE_QUALITY = 0.5;
|
||||
// internal mimes, only used to route data within us (source -> input -> converters)
|
||||
const INT_MIME_VND_AGI_EGO_FRAGMENTS = 'application/vnd.agi.ego.fragments';
|
||||
const INT_MIME_VND_AGI_WEBPAGE = 'application/vnd.agi.webpage';
|
||||
const INT_MIME_VND_AGI_YOUTUBE = 'application/vnd.agi.youtube';
|
||||
|
||||
|
||||
/**
|
||||
@@ -59,6 +62,29 @@ export async function attachmentLoadInputAsync(source: Readonly<AttachmentDraftS
|
||||
// Download URL (page, file, ..) and attach as input
|
||||
case 'url':
|
||||
edit({ label: source.refUrl, ref: source.refUrl });
|
||||
|
||||
// [YouTube] user is attaching a link to a video: try to download this as a transcript rather than a webpage
|
||||
const asYoutubeVideoId = extractYoutubeVideoIDFromURL(source.refUrl);
|
||||
if (asYoutubeVideoId) {
|
||||
const transcript = await youTubeFetchTranscript(asYoutubeVideoId).catch(() => null);
|
||||
if (transcript?.videoTitle && transcript?.transcript) {
|
||||
edit({
|
||||
label: transcript.videoTitle,
|
||||
input: {
|
||||
mimeType: INT_MIME_VND_AGI_YOUTUBE,
|
||||
data: {
|
||||
videoId: asYoutubeVideoId,
|
||||
videoTitle: transcript.videoTitle,
|
||||
videoDescription: transcript.videoDescription,
|
||||
videoThumbnailUrl: transcript.thumbnailUrl,
|
||||
videoTranscript: transcript.transcript,
|
||||
},
|
||||
},
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// fetch the web page
|
||||
const { title, content: { html, markdown, text }, screenshot } = await callBrowseFetchPage(
|
||||
@@ -245,6 +271,12 @@ export function attachmentDefineConverters(source: AttachmentDraftSource, input:
|
||||
}
|
||||
break;
|
||||
|
||||
// YouTube: custom converters
|
||||
case input.mimeType === INT_MIME_VND_AGI_YOUTUBE:
|
||||
converters.push({ id: 'youtube-transcript', name: 'Video Transcript' });
|
||||
converters.push({ id: 'youtube-transcript-simple', name: 'Video Transcript (simple)' });
|
||||
break;
|
||||
|
||||
// EGO
|
||||
case input.mimeType === INT_MIME_VND_AGI_EGO_FRAGMENTS:
|
||||
converters.push({ id: 'ego-fragments-inlined', name: 'Message' });
|
||||
@@ -280,13 +312,17 @@ function _prepareDocData(source: AttachmentDraftSource, input: Readonly<Attachme
|
||||
|
||||
// Downloaded URL as Text, Markdown, or HTML
|
||||
case 'url':
|
||||
let pageTitle = inputMime === INT_MIME_VND_AGI_WEBPAGE ? (input.data as DraftWebInputData)?.pageTitle : undefined;
|
||||
let pageTitle =
|
||||
inputMime === INT_MIME_VND_AGI_WEBPAGE ? (input.data as DraftWebInputData)?.pageTitle
|
||||
: inputMime === INT_MIME_VND_AGI_YOUTUBE ? (input.data as DraftYouTubeInputData)?.videoTitle
|
||||
: undefined;
|
||||
if (!pageTitle)
|
||||
pageTitle = `Web page: ${source.refUrl}`;
|
||||
const urlRefString = inputMime === INT_MIME_VND_AGI_YOUTUBE ? 'youtube-' + (input.data as DraftYouTubeInputData)?.videoId : pageTitle;
|
||||
return {
|
||||
title: pageTitle,
|
||||
caption: converterName,
|
||||
refString: humanReadableHyphenated(pageTitle),
|
||||
refString: humanReadableHyphenated(urlRefString),
|
||||
};
|
||||
|
||||
// File of various kinds and coming from various sources
|
||||
@@ -639,6 +675,22 @@ export async function attachmentPerformConversion(
|
||||
break;
|
||||
|
||||
|
||||
// youtube transcript
|
||||
case 'youtube-transcript':
|
||||
case 'youtube-transcript-simple':
|
||||
if (!input.data || input.mimeType !== INT_MIME_VND_AGI_YOUTUBE) {
|
||||
console.log('Expected YouTubeInputData for youtube-transcript, got:', input.data);
|
||||
break;
|
||||
}
|
||||
const youtubeData = input.data as DraftYouTubeInputData;
|
||||
const transcriptText =
|
||||
converter.id === 'youtube-transcript-simple' ? youtubeData.videoTranscript
|
||||
: `**YouTube Title**: ${youtubeData.videoTitle}\n\n**YouTube Description**: ${youtubeData.videoDescription}\n\n**YouTube Transcript**:\n${youtubeData.videoTranscript}\n`;
|
||||
const transcriptTextData = createDMessageDataInlineText(transcriptText, 'text/plain');
|
||||
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, transcriptTextData, refString, docMeta, undefined));
|
||||
break;
|
||||
|
||||
|
||||
// ego: message
|
||||
case 'ego-fragments-inlined':
|
||||
if (!input.data || input.mimeType !== INT_MIME_VND_AGI_EGO_FRAGMENTS || !(input.data as DraftEgoFragmentsInputData).fragments?.length) {
|
||||
|
||||
@@ -67,7 +67,7 @@ export type AttachmentDraftSourceOriginDTO = 'drop' | 'paste';
|
||||
|
||||
export type AttachmentDraftInput = {
|
||||
mimeType: string; // Original MIME type of the file, or application specific type
|
||||
data: string | ArrayBuffer | DraftWebInputData | DraftEgoFragmentsInputData; // The original data of the attachment
|
||||
data: string | ArrayBuffer | DraftWebInputData | DraftYouTubeInputData | DraftEgoFragmentsInputData; // The original data of the attachment
|
||||
dataSize?: number; // Size of the original data (for plain/simple 1:1 mime)
|
||||
altMimeType?: string; // Alternative MIME type for the input
|
||||
altData?: string; // Alternative data for the input
|
||||
@@ -88,6 +88,14 @@ export type DraftWebInputData = {
|
||||
pageTitle?: string;
|
||||
}
|
||||
|
||||
export type DraftYouTubeInputData = {
|
||||
videoId: string;
|
||||
videoTitle: string;
|
||||
videoDescription: string;
|
||||
videoThumbnailUrl: string;
|
||||
videoTranscript: string;
|
||||
}
|
||||
|
||||
export type DraftEgoFragmentsInputData = {
|
||||
fragments: DMessageFragment[];
|
||||
conversationTitle: string;
|
||||
@@ -121,6 +129,7 @@ export type AttachmentDraftConverterType =
|
||||
| 'pdf-text' | 'pdf-images'
|
||||
| 'docx-to-html'
|
||||
| 'url-page-text' | 'url-page-markdown' | 'url-page-html' | 'url-page-null' | 'url-page-image'
|
||||
| 'youtube-transcript' | 'youtube-transcript-simple'
|
||||
| 'ego-fragments-inlined'
|
||||
| 'unhandled';
|
||||
|
||||
|
||||
@@ -5,9 +5,7 @@
|
||||
import * as React from 'react';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
|
||||
import { frontendSideFetch } from '~/common/util/clientFetchers';
|
||||
|
||||
import { fetchYouTubeTranscript } from './youtube.fetcher';
|
||||
// import { fetchYouTubeTranscript } from './youtube.fetcher';
|
||||
import { apiAsync } from '~/common/util/trpc.client';
|
||||
|
||||
// configuration
|
||||
@@ -20,6 +18,15 @@ export interface YTVideoTranscript {
|
||||
thumbnailUrl: string;
|
||||
}
|
||||
|
||||
export async function youTubeFetchTranscript(videoId: string) {
|
||||
if (USE_FRONTEND_FETCH) {
|
||||
// return fetchYouTubeTranscript(videoId, url => frontendSideFetch(url).then(res => res.text()));
|
||||
throw new Error('Big-AGI: Browser youtube transcript download is disabled.');
|
||||
}
|
||||
return apiAsync.youtube.getTranscript.query({ videoId });
|
||||
}
|
||||
|
||||
|
||||
export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (transcript: YTVideoTranscript) => void) {
|
||||
|
||||
// state
|
||||
@@ -29,9 +36,7 @@ export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (t
|
||||
const { data, isFetching, isError, error } = useQuery({
|
||||
enabled: !!videoID,
|
||||
queryKey: ['transcript', videoID],
|
||||
queryFn: async () => USE_FRONTEND_FETCH
|
||||
? fetchYouTubeTranscript(videoID!, url => frontendSideFetch(url).then(res => res.text()))
|
||||
: apiAsync.youtube.getTranscript.query({ videoId: videoID! }),
|
||||
queryFn: async () => youTubeFetchTranscript(videoID!),
|
||||
staleTime: Infinity,
|
||||
});
|
||||
|
||||
@@ -54,6 +59,7 @@ export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (t
|
||||
return {
|
||||
transcript,
|
||||
isFetching,
|
||||
isError, error,
|
||||
isError,
|
||||
error,
|
||||
};
|
||||
}
|
||||
@@ -7,7 +7,7 @@ import { z } from 'zod';
|
||||
import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
|
||||
import { fetchTextOrTRPCThrow } from '~/server/api/trpc.router.fetchers';
|
||||
|
||||
import { fetchYouTubeTranscript } from './youtube.fetcher';
|
||||
import { downloadYouTubeTranscript } from './youtube.server';
|
||||
|
||||
|
||||
const inputSchema = z.object({
|
||||
@@ -24,7 +24,7 @@ export const youtubeRouter = createTRPCRouter({
|
||||
.input(inputSchema)
|
||||
.query(async ({ input }) => {
|
||||
const { videoId } = input;
|
||||
return await fetchYouTubeTranscript(videoId, (url) => fetchTextOrTRPCThrow({ url, name: 'YouTube Transcript' }));
|
||||
return await downloadYouTubeTranscript(videoId, (url) => fetchTextOrTRPCThrow({ url, name: 'YouTube Transcript' }));
|
||||
}),
|
||||
|
||||
});
|
||||
|
||||
@@ -1,21 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
const youtubeTranscriptionSchema = z.object({
|
||||
wireMagic: z.literal('pb3'),
|
||||
events: z.array(
|
||||
z.object({
|
||||
tStartMs: z.number(),
|
||||
dDurationMs: z.number().optional(),
|
||||
aAppend: z.number().optional(),
|
||||
segs: z.array(
|
||||
z.object({
|
||||
utf8: z.string(),
|
||||
tOffsetMs: z.number().optional(),
|
||||
}),
|
||||
).optional(),
|
||||
}),
|
||||
),
|
||||
});
|
||||
/// THIS IS NORMALLY SERVER-SIDE CODE - do not include/invoke in the frontend ///
|
||||
|
||||
|
||||
function extractFromTo(html: string, from: string, to: string, label: string): string {
|
||||
const indexStart = html.indexOf(from);
|
||||
@@ -29,25 +15,44 @@ function extractFromTo(html: string, from: string, to: string, label: string): s
|
||||
interface YouTubeTranscriptData {
|
||||
videoId: string;
|
||||
videoTitle: string;
|
||||
videoDescription: string;
|
||||
thumbnailUrl: string;
|
||||
transcript: string;
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(text: string): string {
|
||||
const entities: { [key: string]: string } = {
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
'"': '"',
|
||||
''': '\'',
|
||||
'/': '/',
|
||||
'`': '`',
|
||||
'=': '=',
|
||||
};
|
||||
return text.replace(/&(?:#x?[0-9a-f]+|[a-z]+);/gi, (match) =>
|
||||
entities[match] || match,
|
||||
);
|
||||
}
|
||||
|
||||
export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url: string) => Promise<string>): Promise<YouTubeTranscriptData> {
|
||||
export async function downloadYouTubeTranscript(videoId: string, fetchTextFn: (url: string) => Promise<string>): Promise<YouTubeTranscriptData> {
|
||||
|
||||
// 1. find the captions URL within the video HTML page
|
||||
const html = await fetchTextFn(`https://www.youtube.com/watch?v=${videoId}`);
|
||||
|
||||
const captionsUrlEnc = extractFromTo(html, 'https://www.youtube.com/api/timedtext', '"', 'Captions URL');
|
||||
const captionsUrl = decodeURIComponent(captionsUrlEnc.replaceAll('\\u0026', '&'));
|
||||
|
||||
const thumbnailUrl = extractFromTo(html, 'https://i.ytimg.com/vi/', '"', 'Thumbnail URL').replaceAll('maxres', 'hq');
|
||||
const videoTitle = extractFromTo(html, '<title>', '</title>', 'Video Title').slice(7).replaceAll(' - YouTube', '').trim();
|
||||
const videoTitle = decodeHtmlEntities(extractFromTo(html, '<title>', '</title>', 'Video Title').slice(7).replaceAll(' - YouTube', '').trim());
|
||||
const videoDescription = extractFromTo(html, ',"shortDescription":"', '","', 'Video Description').slice(21);
|
||||
|
||||
// 2. fetch the captions
|
||||
// note: the desktop player appends this much: &fmt=json3&xorb=2&xobt=3&xovt=3&cbr=Chrome&cbrver=114.0.0.0&c=WEB&cver=2.20230628.07.00&cplayer=UNIPLAYER&cos=Windows&cosver=10.0&cplatform=DESKTOP
|
||||
const captions = await fetchTextFn(captionsUrl + `&fmt=json3`);
|
||||
|
||||
// parse json
|
||||
let captionsJson: any;
|
||||
try {
|
||||
captionsJson = JSON.parse(captions);
|
||||
@@ -55,6 +60,24 @@ export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url:
|
||||
console.error(e);
|
||||
throw new Error('[YouTube API Issue] Could not parse the captions');
|
||||
}
|
||||
|
||||
// validate object
|
||||
const youtubeTranscriptionSchema = z.object({
|
||||
wireMagic: z.literal('pb3'),
|
||||
events: z.array(
|
||||
z.object({
|
||||
tStartMs: z.number(),
|
||||
dDurationMs: z.number().optional(),
|
||||
aAppend: z.number().optional(),
|
||||
segs: z.array(
|
||||
z.object({
|
||||
utf8: z.string(),
|
||||
tOffsetMs: z.number().optional(),
|
||||
}),
|
||||
).optional(),
|
||||
}),
|
||||
),
|
||||
});
|
||||
const safeData = youtubeTranscriptionSchema.safeParse(captionsJson);
|
||||
if (!safeData.success) {
|
||||
console.error(safeData.error);
|
||||
@@ -70,6 +93,7 @@ export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url:
|
||||
return {
|
||||
videoId,
|
||||
videoTitle,
|
||||
videoDescription,
|
||||
thumbnailUrl,
|
||||
transcript,
|
||||
};
|
||||
Reference in New Issue
Block a user