From 3978c50afcc755882b79f018421bc82896c5dc6b Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Fri, 12 Jul 2024 16:53:28 -0700 Subject: [PATCH] Browsing: enable page screenshot --- .../llmattachments/LLMAttachmentItem.tsx | 1 + .../llmattachments/LLMAttachmentMenu.tsx | 14 ++++++- .../attachment-drafts/attachment.pipeline.ts | 40 ++++++++++++++++--- .../attachment-drafts/attachment.types.ts | 12 +++++- .../attachment-drafts/useAttachmentDrafts.tsx | 2 +- src/modules/browse/browse.client.ts | 10 ++--- 6 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/apps/chat/components/composer/llmattachments/LLMAttachmentItem.tsx b/src/apps/chat/components/composer/llmattachments/LLMAttachmentItem.tsx index 451017d9a..77504cef3 100644 --- a/src/apps/chat/components/composer/llmattachments/LLMAttachmentItem.tsx +++ b/src/apps/chat/components/composer/llmattachments/LLMAttachmentItem.tsx @@ -84,6 +84,7 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com 'pdf-images': PermMediaOutlinedIcon, 'docx-to-html': DescriptionOutlinedIcon, 'ego-fragments-inlined': TelegramIcon, + 'url-image': ImageOutlinedIcon, 'unhandled': TextureIcon, }; diff --git a/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx b/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx index 85f99d24f..502a75520 100644 --- a/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx +++ b/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx @@ -10,8 +10,9 @@ import VerticalAlignBottomIcon from '@mui/icons-material/VerticalAlignBottom'; import { showImageDataRefInNewTab } from '~/modules/blocks/image/RenderImageRefDBlob'; -import { DMessageAttachmentFragment, isDocPart, isImageRefPart } from '~/common/stores/chat/chat.fragments'; import { CloseableMenu } from '~/common/components/CloseableMenu'; +import { DMessageAttachmentFragment, isDocPart, isImageRefPart } from '~/common/stores/chat/chat.fragments'; +import { showImageDataURLInNewTab } from '~/common/util/imageUtils'; import type { AttachmentDraftId } from '~/common/attachment-drafts/attachment.types'; import type { AttachmentDraftsStoreApi } from '~/common/attachment-drafts/store-attachment-drafts-slice'; @@ -142,10 +143,19 @@ export function LLMAttachmentMenu(props: { 🡐 {draftInput.altMimeType} · {draftInput.altData?.length.toLocaleString()} )} + {!!draftInput?.urlImage && ( + + 🡐 {draftInput.urlImage.mimeType} · {draftInput.urlImage.width} x {draftInput.urlImage.height} · {draftInput.urlImage.webpDataUrl?.length.toLocaleString()} + {' · '} + showImageDataURLInNewTab(draftInput?.urlImage?.webpDataUrl || '')}> + open + + + )} {/**/} {/* Converters: {aConverters.map(((converter, idx) => ` ${converter.id}${(idx === draft.converterIdx) ? '*' : ''}`)).join(', ')}*/} {/**/} - + {isOutputMissing ? ( 🡒 ... ) : ( diff --git a/src/common/attachment-drafts/attachment.pipeline.ts b/src/common/attachment-drafts/attachment.pipeline.ts index 520226e14..cf0b5b7f1 100644 --- a/src/common/attachment-drafts/attachment.pipeline.ts +++ b/src/common/attachment-drafts/attachment.pipeline.ts @@ -145,12 +145,17 @@ export async function attachmentLoadInputAsync(source: Readonly | undefined = page.title ? { altMimeType: 'application/vnd.agi.title', altData: page.title } : undefined; + const { title, content: { html, markdown, text }, screenshot } = await callBrowseFetchPage( + source.url, undefined, { width: 512, height: 512, quality: 98 }, + ); + // [special] the page title is in the alt mime + const titleObject: Partial | undefined = title ? { altMimeType: 'application/vnd.agi.title', altData: title } : undefined; + // [special] attach the screenshot too, if present + const screenshotObject: Partial | undefined = screenshot ? { urlImage: screenshot } : undefined; edit( - page.content.markdown ? { input: { mimeType: 'text/markdown', data: page.content.markdown, dataSize: page.content.markdown.length, ...titleObject } } - : page.content.text ? { input: { mimeType: 'text/plain', data: page.content.text, dataSize: page.content.text.length, ...titleObject } } - : page.content.html ? { input: { mimeType: 'text/html', data: page.content.html, dataSize: page.content.html.length, ...titleObject } } + markdown ? { input: { mimeType: 'text/markdown', data: markdown, dataSize: markdown.length, ...titleObject, ...screenshotObject } } + : text ? { input: { mimeType: 'text/plain', data: text, dataSize: text.length, ...titleObject, ...screenshotObject } } + : html ? { input: { mimeType: 'text/html', data: html, dataSize: html.length, ...titleObject, ...screenshotObject } } : { inputError: 'No content found at this link' }, ); } catch (error: any) { @@ -310,6 +315,10 @@ export function attachmentDefineConverters(sourceType: AttachmentDraftSource['me break; } + // URL screenshots, independent of the mime + if (input.urlImage) + converters.push({ id: 'url-image', name: 'Screenshot', disabled: !input.urlImage.width || !input.urlImage.height }); + edit({ converters }); } @@ -607,6 +616,27 @@ export async function attachmentPerformConversion( } break; + // urlimage + case 'url-image': + if (!input.urlImage) { + console.log('Expected URL image data for url-image, got:', input.urlImage); + break; + } + try { + // get the data + const { mimeType, webpDataUrl } = input.urlImage; + const dataIndex = webpDataUrl.indexOf(','); + const base64Data = webpDataUrl.slice(dataIndex + 1); + // do not convert, as we're in the optimal webp already + // do not resize, as the 512x512 is optimal for most LLM Vendors, an a great tradeoff of quality/size/cost + const screenshotImageF = await imageDataToImageAttachmentFragmentViaDBlob(mimeType, base64Data, source, `Screenshot of ${title}`, caption, false, false); + if (screenshotImageF) + newFragments.push(screenshotImageF); + } catch (error) { + console.error('Error attaching screenshot URL image:', error); + } + break; + case 'unhandled': // force the user to explicitly select 'as text' if they want to proceed break; diff --git a/src/common/attachment-drafts/attachment.types.ts b/src/common/attachment-drafts/attachment.types.ts index d070c9ada..e4ddd5ea6 100644 --- a/src/common/attachment-drafts/attachment.types.ts +++ b/src/common/attachment-drafts/attachment.types.ts @@ -38,8 +38,8 @@ export type AttachmentDraftId = string; export type AttachmentDraftSource = { media: 'url'; - url: string; - refUrl: string; + url: string; // parsed valid url + refUrl: string; // original text (use this as text ref, otherwise use the url) } | { media: 'file'; origin: AttachmentDraftSourceOriginFile, @@ -75,6 +75,13 @@ export type AttachmentDraftInput = { dataSize: number; // Size of the original data in bytes altMimeType?: string; // Alternative MIME type for the input altData?: string; // Alternative data for the input + // [media:URL] special for download inputs + urlImage?: { + webpDataUrl: string; + mimeType: string; + width: number; + height: number; + }; // preview?: AttachmentPreview; // Preview of the input }; @@ -99,6 +106,7 @@ export type AttachmentDraftConverterType = | 'pdf-text' | 'pdf-images' | 'docx-to-html' | 'ego-fragments-inlined' + | 'url-image' | 'unhandled'; diff --git a/src/common/attachment-drafts/useAttachmentDrafts.tsx b/src/common/attachment-drafts/useAttachmentDrafts.tsx index 359dd1d77..67dc620aa 100644 --- a/src/common/attachment-drafts/useAttachmentDrafts.tsx +++ b/src/common/attachment-drafts/useAttachmentDrafts.tsx @@ -87,7 +87,7 @@ export const useAttachmentDrafts = (attachmentsStoreApi: AttachmentDraftsStoreAp const textPlain = dt.getData('text/plain') || ''; if (textPlain && enableLoadURLs) { const textPlainUrl = asValidURL(textPlain); - if (textPlainUrl && textPlainUrl) { + if (textPlainUrl && textPlainUrl.trim()) { void _createAttachmentDraft({ media: 'url', url: textPlainUrl, refUrl: textPlain, }); diff --git a/src/modules/browse/browse.client.ts b/src/modules/browse/browse.client.ts index 9e68dcf05..9669d3b18 100644 --- a/src/modules/browse/browse.client.ts +++ b/src/modules/browse/browse.client.ts @@ -1,4 +1,4 @@ -import { useBrowseStore } from '~/modules/browse/store-module-browsing'; +import { BrowsePageTransform, useBrowseStore } from '~/modules/browse/store-module-browsing'; import { apiAsyncNode } from '~/common/util/trpc.client'; @@ -11,8 +11,8 @@ const DEBUG_SHOW_SCREENSHOT = false; export async function callBrowseFetchPage( url: string, - // transforms?: BrowsePageTransform[], - // screenshotOptions?: { width: number, height: number, quality?: number }, + transforms?: BrowsePageTransform[], + screenshotOptions?: { width: number, height: number, quality?: number }, ) { // validate url @@ -33,8 +33,8 @@ export async function callBrowseFetchPage( }, requests: [{ url, - transforms: /*transforms ? transforms :*/ [pageTransform], - screenshot: /*screenshotOptions ? screenshotOptions :*/ !DEBUG_SHOW_SCREENSHOT ? undefined : { + transforms: transforms ? transforms : [pageTransform], + screenshot: screenshotOptions ? screenshotOptions : !DEBUG_SHOW_SCREENSHOT ? undefined : { width: 512, height: 512, // quality: 100,