diff --git a/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx b/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx index 4b09ad013..ed025400c 100644 --- a/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx +++ b/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx @@ -15,6 +15,7 @@ import TelegramIcon from '@mui/icons-material/Telegram'; import TextFieldsIcon from '@mui/icons-material/TextFields'; import TextureIcon from '@mui/icons-material/Texture'; import WarningRoundedIcon from '@mui/icons-material/WarningRounded'; +import YouTubeIcon from '@mui/icons-material/YouTube'; import { RenderImageURL } from '~/modules/blocks/image/RenderImageURL'; @@ -93,6 +94,8 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com 'url-page-html': HtmlIcon, // was LanguageIcon 'url-page-null': TextureIcon, 'url-page-image': ImageOutlinedIcon, + 'youtube-transcript': YouTubeIcon, + 'youtube-transcript-simple': YouTubeIcon, 'ego-fragments-inlined': TelegramIcon, 'unhandled': TextureIcon, }; diff --git a/src/common/attachment-drafts/attachment.pipeline.ts b/src/common/attachment-drafts/attachment.pipeline.ts index 239e8d8b3..64b5615b2 100644 --- a/src/common/attachment-drafts/attachment.pipeline.ts +++ b/src/common/attachment-drafts/attachment.pipeline.ts @@ -1,4 +1,6 @@ import { callBrowseFetchPage } from '~/modules/browse/browse.client'; +import { extractYoutubeVideoIDFromURL } from '~/modules/youtube/youtube.utils'; +import { youTubeFetchTranscript } from '~/modules/youtube/useYouTubeTranscript'; import { agiCustomId, agiUuid } from '~/common/util/idUtils'; import { htmlTableToMarkdown } from '~/common/util/htmlTableToMarkdown'; @@ -8,7 +10,7 @@ import { pdfToImageDataURLs, pdfToText } from '~/common/util/pdfUtils'; import { createDMessageDataInlineText, createDocAttachmentFragment, DMessageAttachmentFragment, DMessageDataInline, DMessageDocPart, DVMimeType, isContentOrAttachmentFragment, isDocPart, specialContentPartToDocAttachmentFragment } from '~/common/stores/chat/chat.fragments'; import { liveFileCreateOrThrow } from '~/common/livefile/store-live-file'; -import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource, DraftEgoFragmentsInputData, DraftWebInputData } from './attachment.types'; +import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource, DraftEgoFragmentsInputData, DraftWebInputData, DraftYouTubeInputData } from './attachment.types'; import type { AttachmentsDraftsStore } from './store-attachment-drafts-slice'; import { guessInputContentTypeFromMime, heuristicMimeTypeFixup, mimeTypeIsDocX, mimeTypeIsPDF, mimeTypeIsPlainText, mimeTypeIsSupportedImage, reverseLookupMimeType } from './attachment.mimetypes'; import { imageDataToImageAttachmentFragmentViaDBlob } from './attachment.dblobs'; @@ -23,6 +25,7 @@ const PDF_IMAGE_QUALITY = 0.5; // internal mimes, only used to route data within us (source -> input -> converters) const INT_MIME_VND_AGI_EGO_FRAGMENTS = 'application/vnd.agi.ego.fragments'; const INT_MIME_VND_AGI_WEBPAGE = 'application/vnd.agi.webpage'; +const INT_MIME_VND_AGI_YOUTUBE = 'application/vnd.agi.youtube'; /** @@ -59,6 +62,29 @@ export async function attachmentLoadInputAsync(source: Readonly null); + if (transcript?.videoTitle && transcript?.transcript) { + edit({ + label: transcript.videoTitle, + input: { + mimeType: INT_MIME_VND_AGI_YOUTUBE, + data: { + videoId: asYoutubeVideoId, + videoTitle: transcript.videoTitle, + videoDescription: transcript.videoDescription, + videoThumbnailUrl: transcript.thumbnailUrl, + videoTranscript: transcript.transcript, + }, + }, + }); + break; + } + } + try { // fetch the web page const { title, content: { html, markdown, text }, screenshot } = await callBrowseFetchPage( @@ -245,6 +271,12 @@ export function attachmentDefineConverters(source: AttachmentDraftSource, input: } break; + // YouTube: custom converters + case input.mimeType === INT_MIME_VND_AGI_YOUTUBE: + converters.push({ id: 'youtube-transcript', name: 'Video Transcript' }); + converters.push({ id: 'youtube-transcript-simple', name: 'Video Transcript (simple)' }); + break; + // EGO case input.mimeType === INT_MIME_VND_AGI_EGO_FRAGMENTS: converters.push({ id: 'ego-fragments-inlined', name: 'Message' }); @@ -280,13 +312,17 @@ function _prepareDocData(source: AttachmentDraftSource, input: Readonly frontendSideFetch(url).then(res => res.text())); + throw new Error('Big-AGI: Browser youtube transcript download is disabled.'); + } + return apiAsync.youtube.getTranscript.query({ videoId }); +} + + export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (transcript: YTVideoTranscript) => void) { // state @@ -29,9 +36,7 @@ export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (t const { data, isFetching, isError, error } = useQuery({ enabled: !!videoID, queryKey: ['transcript', videoID], - queryFn: async () => USE_FRONTEND_FETCH - ? fetchYouTubeTranscript(videoID!, url => frontendSideFetch(url).then(res => res.text())) - : apiAsync.youtube.getTranscript.query({ videoId: videoID! }), + queryFn: async () => youTubeFetchTranscript(videoID!), staleTime: Infinity, }); @@ -54,6 +59,7 @@ export function useYouTubeTranscript(videoID: string | null, onNewTranscript: (t return { transcript, isFetching, - isError, error, + isError, + error, }; } \ No newline at end of file diff --git a/src/modules/youtube/youtube.router.ts b/src/modules/youtube/youtube.router.ts index 203771683..09e91b9c2 100644 --- a/src/modules/youtube/youtube.router.ts +++ b/src/modules/youtube/youtube.router.ts @@ -7,7 +7,7 @@ import { z } from 'zod'; import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server'; import { fetchTextOrTRPCThrow } from '~/server/api/trpc.router.fetchers'; -import { fetchYouTubeTranscript } from './youtube.fetcher'; +import { downloadYouTubeTranscript } from './youtube.server'; const inputSchema = z.object({ @@ -24,7 +24,7 @@ export const youtubeRouter = createTRPCRouter({ .input(inputSchema) .query(async ({ input }) => { const { videoId } = input; - return await fetchYouTubeTranscript(videoId, (url) => fetchTextOrTRPCThrow({ url, name: 'YouTube Transcript' })); + return await downloadYouTubeTranscript(videoId, (url) => fetchTextOrTRPCThrow({ url, name: 'YouTube Transcript' })); }), }); diff --git a/src/modules/youtube/youtube.fetcher.ts b/src/modules/youtube/youtube.server.ts similarity index 58% rename from src/modules/youtube/youtube.fetcher.ts rename to src/modules/youtube/youtube.server.ts index c01cc7a24..ba75b6ceb 100644 --- a/src/modules/youtube/youtube.fetcher.ts +++ b/src/modules/youtube/youtube.server.ts @@ -1,21 +1,7 @@ import { z } from 'zod'; -const youtubeTranscriptionSchema = z.object({ - wireMagic: z.literal('pb3'), - events: z.array( - z.object({ - tStartMs: z.number(), - dDurationMs: z.number().optional(), - aAppend: z.number().optional(), - segs: z.array( - z.object({ - utf8: z.string(), - tOffsetMs: z.number().optional(), - }), - ).optional(), - }), - ), -}); +/// THIS IS NORMALLY SERVER-SIDE CODE - do not include/invoke in the frontend /// + function extractFromTo(html: string, from: string, to: string, label: string): string { const indexStart = html.indexOf(from); @@ -29,25 +15,44 @@ function extractFromTo(html: string, from: string, to: string, label: string): s interface YouTubeTranscriptData { videoId: string; videoTitle: string; + videoDescription: string; thumbnailUrl: string; transcript: string; } +function decodeHtmlEntities(text: string): string { + const entities: { [key: string]: string } = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + ''': '\'', + '/': '/', + '`': '`', + '=': '=', + }; + return text.replace(/&(?:#x?[0-9a-f]+|[a-z]+);/gi, (match) => + entities[match] || match, + ); +} -export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url: string) => Promise): Promise { +export async function downloadYouTubeTranscript(videoId: string, fetchTextFn: (url: string) => Promise): Promise { // 1. find the captions URL within the video HTML page const html = await fetchTextFn(`https://www.youtube.com/watch?v=${videoId}`); const captionsUrlEnc = extractFromTo(html, 'https://www.youtube.com/api/timedtext', '"', 'Captions URL'); const captionsUrl = decodeURIComponent(captionsUrlEnc.replaceAll('\\u0026', '&')); + const thumbnailUrl = extractFromTo(html, 'https://i.ytimg.com/vi/', '"', 'Thumbnail URL').replaceAll('maxres', 'hq'); - const videoTitle = extractFromTo(html, '', '', 'Video Title').slice(7).replaceAll(' - YouTube', '').trim(); + const videoTitle = decodeHtmlEntities(extractFromTo(html, '', '', 'Video Title').slice(7).replaceAll(' - YouTube', '').trim()); + const videoDescription = extractFromTo(html, ',"shortDescription":"', '","', 'Video Description').slice(21); // 2. fetch the captions // note: the desktop player appends this much: &fmt=json3&xorb=2&xobt=3&xovt=3&cbr=Chrome&cbrver=114.0.0.0&c=WEB&cver=2.20230628.07.00&cplayer=UNIPLAYER&cos=Windows&cosver=10.0&cplatform=DESKTOP const captions = await fetchTextFn(captionsUrl + `&fmt=json3`); + // parse json let captionsJson: any; try { captionsJson = JSON.parse(captions); @@ -55,6 +60,24 @@ export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url: console.error(e); throw new Error('[YouTube API Issue] Could not parse the captions'); } + + // validate object + const youtubeTranscriptionSchema = z.object({ + wireMagic: z.literal('pb3'), + events: z.array( + z.object({ + tStartMs: z.number(), + dDurationMs: z.number().optional(), + aAppend: z.number().optional(), + segs: z.array( + z.object({ + utf8: z.string(), + tOffsetMs: z.number().optional(), + }), + ).optional(), + }), + ), + }); const safeData = youtubeTranscriptionSchema.safeParse(captionsJson); if (!safeData.success) { console.error(safeData.error); @@ -70,6 +93,7 @@ export async function fetchYouTubeTranscript(videoId: string, fetchTextFn: (url: return { videoId, videoTitle, + videoDescription, thumbnailUrl, transcript, };