Attachments: youtube links as transcripts

This commit is contained in:
Enrico Ros
2024-08-06 00:37:31 -07:00
parent 85aed347cf
commit 67184536a6
6 changed files with 125 additions and 31 deletions
@@ -15,6 +15,7 @@ import TelegramIcon from '@mui/icons-material/Telegram';
import TextFieldsIcon from '@mui/icons-material/TextFields';
import TextureIcon from '@mui/icons-material/Texture';
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
import YouTubeIcon from '@mui/icons-material/YouTube';
import { RenderImageURL } from '~/modules/blocks/image/RenderImageURL';
@@ -93,6 +94,8 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com
'url-page-html': HtmlIcon, // was LanguageIcon
'url-page-null': TextureIcon,
'url-page-image': ImageOutlinedIcon,
'youtube-transcript': YouTubeIcon,
'youtube-transcript-simple': YouTubeIcon,
'ego-fragments-inlined': TelegramIcon,
'unhandled': TextureIcon,
};
@@ -1,4 +1,6 @@
import { callBrowseFetchPage } from '~/modules/browse/browse.client';
import { extractYoutubeVideoIDFromURL } from '~/modules/youtube/youtube.utils';
import { youTubeFetchTranscript } from '~/modules/youtube/useYouTubeTranscript';
import { agiCustomId, agiUuid } from '~/common/util/idUtils';
import { htmlTableToMarkdown } from '~/common/util/htmlTableToMarkdown';
@@ -8,7 +10,7 @@ import { pdfToImageDataURLs, pdfToText } from '~/common/util/pdfUtils';
import { createDMessageDataInlineText, createDocAttachmentFragment, DMessageAttachmentFragment, DMessageDataInline, DMessageDocPart, DVMimeType, isContentOrAttachmentFragment, isDocPart, specialContentPartToDocAttachmentFragment } from '~/common/stores/chat/chat.fragments';
import { liveFileCreateOrThrow } from '~/common/livefile/store-live-file';
import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource, DraftEgoFragmentsInputData, DraftWebInputData } from './attachment.types';
import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource, DraftEgoFragmentsInputData, DraftWebInputData, DraftYouTubeInputData } from './attachment.types';
import type { AttachmentsDraftsStore } from './store-attachment-drafts-slice';
import { guessInputContentTypeFromMime, heuristicMimeTypeFixup, mimeTypeIsDocX, mimeTypeIsPDF, mimeTypeIsPlainText, mimeTypeIsSupportedImage, reverseLookupMimeType } from './attachment.mimetypes';
import { imageDataToImageAttachmentFragmentViaDBlob } from './attachment.dblobs';
@@ -23,6 +25,7 @@ const PDF_IMAGE_QUALITY = 0.5;
// internal mimes, only used to route data within us (source -> input -> converters)
const INT_MIME_VND_AGI_EGO_FRAGMENTS = 'application/vnd.agi.ego.fragments';
const INT_MIME_VND_AGI_WEBPAGE = 'application/vnd.agi.webpage';
const INT_MIME_VND_AGI_YOUTUBE = 'application/vnd.agi.youtube';
/**
@@ -59,6 +62,29 @@ export async function attachmentLoadInputAsync(source: Readonly<AttachmentDraftS
// Download URL (page, file, ..) and attach as input
case 'url':
edit({ label: source.refUrl, ref: source.refUrl });
// [YouTube] user is attaching a link to a video: try to download this as a transcript rather than a webpage
const asYoutubeVideoId = extractYoutubeVideoIDFromURL(source.refUrl);
if (asYoutubeVideoId) {
const transcript = await youTubeFetchTranscript(asYoutubeVideoId).catch(() => null);
if (transcript?.videoTitle && transcript?.transcript) {
edit({
label: transcript.videoTitle,
input: {
mimeType: INT_MIME_VND_AGI_YOUTUBE,
data: {
videoId: asYoutubeVideoId,
videoTitle: transcript.videoTitle,
videoDescription: transcript.videoDescription,
videoThumbnailUrl: transcript.thumbnailUrl,
videoTranscript: transcript.transcript,
},
},
});
break;
}
}
try {
// fetch the web page
const { title, content: { html, markdown, text }, screenshot } = await callBrowseFetchPage(
@@ -245,6 +271,12 @@ export function attachmentDefineConverters(source: AttachmentDraftSource, input:
}
break;
// YouTube: custom converters
case input.mimeType === INT_MIME_VND_AGI_YOUTUBE:
converters.push({ id: 'youtube-transcript', name: 'Video Transcript' });
converters.push({ id: 'youtube-transcript-simple', name: 'Video Transcript (simple)' });
break;
// EGO
case input.mimeType === INT_MIME_VND_AGI_EGO_FRAGMENTS:
converters.push({ id: 'ego-fragments-inlined', name: 'Message' });
@@ -280,13 +312,17 @@ function _prepareDocData(source: AttachmentDraftSource, input: Readonly<Attachme
// Downloaded URL as Text, Markdown, or HTML
case 'url':
let pageTitle = inputMime === INT_MIME_VND_AGI_WEBPAGE ? (input.data as DraftWebInputData)?.pageTitle : undefined;
let pageTitle =
inputMime === INT_MIME_VND_AGI_WEBPAGE ? (input.data as DraftWebInputData)?.pageTitle
: inputMime === INT_MIME_VND_AGI_YOUTUBE ? (input.data as DraftYouTubeInputData)?.videoTitle
: undefined;
if (!pageTitle)
pageTitle = `Web page: ${source.refUrl}`;
const urlRefString = inputMime === INT_MIME_VND_AGI_YOUTUBE ? 'youtube-' + (input.data as DraftYouTubeInputData)?.videoId : pageTitle;
return {
title: pageTitle,
caption: converterName,
refString: humanReadableHyphenated(pageTitle),
refString: humanReadableHyphenated(urlRefString),
};
// File of various kinds and coming from various sources
@@ -639,6 +675,22 @@ export async function attachmentPerformConversion(
break;
// youtube transcript
case 'youtube-transcript':
case 'youtube-transcript-simple':
if (!input.data || input.mimeType !== INT_MIME_VND_AGI_YOUTUBE) {
console.log('Expected YouTubeInputData for youtube-transcript, got:', input.data);
break;
}
const youtubeData = input.data as DraftYouTubeInputData;
const transcriptText =
converter.id === 'youtube-transcript-simple' ? youtubeData.videoTranscript
: `**YouTube Title**: ${youtubeData.videoTitle}\n\n**YouTube Description**: ${youtubeData.videoDescription}\n\n**YouTube Transcript**:\n${youtubeData.videoTranscript}\n`;
const transcriptTextData = createDMessageDataInlineText(transcriptText, 'text/plain');
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, transcriptTextData, refString, docMeta, undefined));
break;
// ego: message
case 'ego-fragments-inlined':
if (!input.data || input.mimeType !== INT_MIME_VND_AGI_EGO_FRAGMENTS || !(input.data as DraftEgoFragmentsInputData).fragments?.length) {
@@ -67,7 +67,7 @@ export type AttachmentDraftSourceOriginDTO = 'drop' | 'paste';
export type AttachmentDraftInput = {
mimeType: string; // Original MIME type of the file, or application specific type
data: string | ArrayBuffer | DraftWebInputData | DraftEgoFragmentsInputData; // The original data of the attachment
data: string | ArrayBuffer | DraftWebInputData | DraftYouTubeInputData | DraftEgoFragmentsInputData; // The original data of the attachment
dataSize?: number; // Size of the original data (for plain/simple 1:1 mime)
altMimeType?: string; // Alternative MIME type for the input
altData?: string; // Alternative data for the input
@@ -88,6 +88,14 @@ export type DraftWebInputData = {
pageTitle?: string;
}
export type DraftYouTubeInputData = {
videoId: string;
videoTitle: string;
videoDescription: string;
videoThumbnailUrl: string;
videoTranscript: string;
}
export type DraftEgoFragmentsInputData = {
fragments: DMessageFragment[];
conversationTitle: string;
@@ -121,6 +129,7 @@ export type AttachmentDraftConverterType =
| 'pdf-text' | 'pdf-images'
| 'docx-to-html'
| 'url-page-text' | 'url-page-markdown' | 'url-page-html' | 'url-page-null' | 'url-page-image'
| 'youtube-transcript' | 'youtube-transcript-simple'
| 'ego-fragments-inlined'
| 'unhandled';
+13 -7
View File
@@ -5,9 +5,7 @@
import * as React from 'react';
import { useQuery } from '@tanstack/react-query';
import { frontendSideFetch } from '~/common/util/clientFetchers';
import { fetchYouTubeTranscript } from './youtube.fetcher';
// import { fetchYouTubeTranscript } from './youtube.fetcher';
import { apiAsync } from '~/common/util/trpc.client';
// configuration
@@ -20,6 +18,15 @@ export interface YTVideoTranscript {
thumbnailUrl: string;
}
export async function youTubeFetchTranscript(videoId: string) {
if (USE_FRONTEND_FETCH) {
// return fetchYouTubeTranscript(videoId, url => frontendSideFetch(url).then(res => res.text()));
throw new Error('Big-AGI: Browser youtube transcript download is disabled.');
}
return apiAsync.youtube.getTranscript.query({ videoId });
}
export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (transcript: YTVideoTranscript) => void) {
// state
@@ -29,9 +36,7 @@ export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (t
const { data, isFetching, isError, error } = useQuery({
enabled: !!videoID,
queryKey: ['transcript', videoID],
queryFn: async () => USE_FRONTEND_FETCH
? fetchYouTubeTranscript(videoID!, url => frontendSideFetch(url).then(res => res.text()))
: apiAsync.youtube.getTranscript.query({ videoId: videoID! }),
queryFn: async () => youTubeFetchTranscript(videoID!),
staleTime: Infinity,
});
@@ -54,6 +59,7 @@ export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (t
return {
transcript,
isFetching,
isError, error,
isError,
error,
};
}
+2 -2
View File
@@ -7,7 +7,7 @@ import { z } from 'zod';
import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
import { fetchTextOrTRPCThrow } from '~/server/api/trpc.router.fetchers';
import { fetchYouTubeTranscript } from './youtube.fetcher';
import { downloadYouTubeTranscript } from './youtube.server';
const inputSchema = z.object({
@@ -24,7 +24,7 @@ export const youtubeRouter = createTRPCRouter({
.input(inputSchema)
.query(async ({ input }) => {
const { videoId } = input;
return await fetchYouTubeTranscript(videoId, (url) => fetchTextOrTRPCThrow({ url, name: 'YouTube Transcript' }));
return await downloadYouTubeTranscript(videoId, (url) => fetchTextOrTRPCThrow({ url, name: 'YouTube Transcript' }));
}),
});
@@ -1,21 +1,7 @@
import { z } from 'zod';
const youtubeTranscriptionSchema = z.object({
wireMagic: z.literal('pb3'),
events: z.array(
z.object({
tStartMs: z.number(),
dDurationMs: z.number().optional(),
aAppend: z.number().optional(),
segs: z.array(
z.object({
utf8: z.string(),
tOffsetMs: z.number().optional(),
}),
).optional(),
}),
),
});
/// THIS IS NORMALLY SERVER-SIDE CODE - do not include/invoke in the frontend ///
function extractFromTo(html: string, from: string, to: string, label: string): string {
const indexStart = html.indexOf(from);
@@ -29,25 +15,44 @@ function extractFromTo(html: string, from: string, to: string, label: string): s
interface YouTubeTranscriptData {
videoId: string;
videoTitle: string;
videoDescription: string;
thumbnailUrl: string;
transcript: string;
}
function decodeHtmlEntities(text: string): string {
const entities: { [key: string]: string } = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': '\'',
'&#x2F;': '/',
'&#x60;': '`',
'&#x3D;': '=',
};
return text.replace(/&(?:#x?[0-9a-f]+|[a-z]+);/gi, (match) =>
entities[match] || match,
);
}
export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url: string) => Promise<string>): Promise<YouTubeTranscriptData> {
export async function downloadYouTubeTranscript(videoId: string, fetchTextFn: (url: string) => Promise<string>): Promise<YouTubeTranscriptData> {
// 1. find the captions URL within the video HTML page
const html = await fetchTextFn(`https://www.youtube.com/watch?v=${videoId}`);
const captionsUrlEnc = extractFromTo(html, 'https://www.youtube.com/api/timedtext', '"', 'Captions URL');
const captionsUrl = decodeURIComponent(captionsUrlEnc.replaceAll('\\u0026', '&'));
const thumbnailUrl = extractFromTo(html, 'https://i.ytimg.com/vi/', '"', 'Thumbnail URL').replaceAll('maxres', 'hq');
const videoTitle = extractFromTo(html, '<title>', '</title>', 'Video Title').slice(7).replaceAll(' - YouTube', '').trim();
const videoTitle = decodeHtmlEntities(extractFromTo(html, '<title>', '</title>', 'Video Title').slice(7).replaceAll(' - YouTube', '').trim());
const videoDescription = extractFromTo(html, ',"shortDescription":"', '","', 'Video Description').slice(21);
// 2. fetch the captions
// note: the desktop player appends this much: &fmt=json3&xorb=2&xobt=3&xovt=3&cbr=Chrome&cbrver=114.0.0.0&c=WEB&cver=2.20230628.07.00&cplayer=UNIPLAYER&cos=Windows&cosver=10.0&cplatform=DESKTOP
const captions = await fetchTextFn(captionsUrl + `&fmt=json3`);
// parse json
let captionsJson: any;
try {
captionsJson = JSON.parse(captions);
@@ -55,6 +60,24 @@ export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url:
console.error(e);
throw new Error('[YouTube API Issue] Could not parse the captions');
}
// validate object
const youtubeTranscriptionSchema = z.object({
wireMagic: z.literal('pb3'),
events: z.array(
z.object({
tStartMs: z.number(),
dDurationMs: z.number().optional(),
aAppend: z.number().optional(),
segs: z.array(
z.object({
utf8: z.string(),
tOffsetMs: z.number().optional(),
}),
).optional(),
}),
),
});
const safeData = youtubeTranscriptionSchema.safeParse(captionsJson);
if (!safeData.success) {
console.error(safeData.error);
@@ -70,6 +93,7 @@ export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url:
return {
videoId,
videoTitle,
videoDescription,
thumbnailUrl,
transcript,
};