Browsing: enable page screenshot

This commit is contained in:
Enrico Ros
2024-07-12 16:53:28 -07:00
parent 0d25226c30
commit 3978c50afc
6 changed files with 64 additions and 15 deletions
@@ -84,6 +84,7 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com
'pdf-images': PermMediaOutlinedIcon,
'docx-to-html': DescriptionOutlinedIcon,
'ego-fragments-inlined': TelegramIcon,
'url-image': ImageOutlinedIcon,
'unhandled': TextureIcon,
};
@@ -10,8 +10,9 @@ import VerticalAlignBottomIcon from '@mui/icons-material/VerticalAlignBottom';
import { showImageDataRefInNewTab } from '~/modules/blocks/image/RenderImageRefDBlob';
import { DMessageAttachmentFragment, isDocPart, isImageRefPart } from '~/common/stores/chat/chat.fragments';
import { CloseableMenu } from '~/common/components/CloseableMenu';
import { DMessageAttachmentFragment, isDocPart, isImageRefPart } from '~/common/stores/chat/chat.fragments';
import { showImageDataURLInNewTab } from '~/common/util/imageUtils';
import type { AttachmentDraftId } from '~/common/attachment-drafts/attachment.types';
import type { AttachmentDraftsStoreApi } from '~/common/attachment-drafts/store-attachment-drafts-slice';
@@ -142,10 +143,19 @@ export function LLMAttachmentMenu(props: {
<span style={{ color: 'transparent' }}>🡐</span> {draftInput.altMimeType} · {draftInput.altData?.length.toLocaleString()}
</Typography>
)}
{!!draftInput?.urlImage && (
<Typography level='body-sm'>
<span style={{ color: 'transparent' }}>🡐</span> {draftInput.urlImage.mimeType} · {draftInput.urlImage.width} x {draftInput.urlImage.height} · {draftInput.urlImage.webpDataUrl?.length.toLocaleString()}
{' · '}
<Link onClick={() => showImageDataURLInNewTab(draftInput?.urlImage?.webpDataUrl || '')}>
open <LaunchIcon sx={{ mx: 0.5, fontSize: 16 }} />
</Link>
</Typography>
)}
{/*<Typography level='body-sm'>*/}
{/* Converters: {aConverters.map(((converter, idx) => ` ${converter.id}${(idx === draft.converterIdx) ? '*' : ''}`)).join(', ')}*/}
{/*</Typography>*/}
<Box>
<Box sx={{ mt: 1 }}>
{isOutputMissing ? (
<Typography level='body-sm'>🡒 ...</Typography>
) : (
@@ -145,12 +145,17 @@ export async function attachmentLoadInputAsync(source: Readonly<AttachmentDraftS
case 'url':
edit({ label: source.refUrl, ref: source.refUrl });
try {
const page = await callBrowseFetchPage(source.url);
const titleObject: Partial<AttachmentDraftInput> | undefined = page.title ? { altMimeType: 'application/vnd.agi.title', altData: page.title } : undefined;
const { title, content: { html, markdown, text }, screenshot } = await callBrowseFetchPage(
source.url, undefined, { width: 512, height: 512, quality: 98 },
);
// [special] the page title is in the alt mime
const titleObject: Partial<AttachmentDraftInput> | undefined = title ? { altMimeType: 'application/vnd.agi.title', altData: title } : undefined;
// [special] attach the screenshot too, if present
const screenshotObject: Partial<AttachmentDraftInput> | undefined = screenshot ? { urlImage: screenshot } : undefined;
edit(
page.content.markdown ? { input: { mimeType: 'text/markdown', data: page.content.markdown, dataSize: page.content.markdown.length, ...titleObject } }
: page.content.text ? { input: { mimeType: 'text/plain', data: page.content.text, dataSize: page.content.text.length, ...titleObject } }
: page.content.html ? { input: { mimeType: 'text/html', data: page.content.html, dataSize: page.content.html.length, ...titleObject } }
markdown ? { input: { mimeType: 'text/markdown', data: markdown, dataSize: markdown.length, ...titleObject, ...screenshotObject } }
: text ? { input: { mimeType: 'text/plain', data: text, dataSize: text.length, ...titleObject, ...screenshotObject } }
: html ? { input: { mimeType: 'text/html', data: html, dataSize: html.length, ...titleObject, ...screenshotObject } }
: { inputError: 'No content found at this link' },
);
} catch (error: any) {
@@ -310,6 +315,10 @@ export function attachmentDefineConverters(sourceType: AttachmentDraftSource['me
break;
}
// URL screenshots, independent of the mime
if (input.urlImage)
converters.push({ id: 'url-image', name: 'Screenshot', disabled: !input.urlImage.width || !input.urlImage.height });
edit({ converters });
}
@@ -607,6 +616,27 @@ export async function attachmentPerformConversion(
}
break;
// urlimage
case 'url-image':
if (!input.urlImage) {
console.log('Expected URL image data for url-image, got:', input.urlImage);
break;
}
try {
// get the data
const { mimeType, webpDataUrl } = input.urlImage;
const dataIndex = webpDataUrl.indexOf(',');
const base64Data = webpDataUrl.slice(dataIndex + 1);
// do not convert, as we're in the optimal webp already
// do not resize, as the 512x512 is optimal for most LLM Vendors, an a great tradeoff of quality/size/cost
const screenshotImageF = await imageDataToImageAttachmentFragmentViaDBlob(mimeType, base64Data, source, `Screenshot of ${title}`, caption, false, false);
if (screenshotImageF)
newFragments.push(screenshotImageF);
} catch (error) {
console.error('Error attaching screenshot URL image:', error);
}
break;
case 'unhandled':
// force the user to explicitly select 'as text' if they want to proceed
break;
@@ -38,8 +38,8 @@ export type AttachmentDraftId = string;
export type AttachmentDraftSource = {
media: 'url';
url: string;
refUrl: string;
url: string; // parsed valid url
refUrl: string; // original text (use this as text ref, otherwise use the url)
} | {
media: 'file';
origin: AttachmentDraftSourceOriginFile,
@@ -75,6 +75,13 @@ export type AttachmentDraftInput = {
dataSize: number; // Size of the original data in bytes
altMimeType?: string; // Alternative MIME type for the input
altData?: string; // Alternative data for the input
// [media:URL] special for download inputs
urlImage?: {
webpDataUrl: string;
mimeType: string;
width: number;
height: number;
};
// preview?: AttachmentPreview; // Preview of the input
};
@@ -99,6 +106,7 @@ export type AttachmentDraftConverterType =
| 'pdf-text' | 'pdf-images'
| 'docx-to-html'
| 'ego-fragments-inlined'
| 'url-image'
| 'unhandled';
@@ -87,7 +87,7 @@ export const useAttachmentDrafts = (attachmentsStoreApi: AttachmentDraftsStoreAp
const textPlain = dt.getData('text/plain') || '';
if (textPlain && enableLoadURLs) {
const textPlainUrl = asValidURL(textPlain);
if (textPlainUrl && textPlainUrl) {
if (textPlainUrl && textPlainUrl.trim()) {
void _createAttachmentDraft({
media: 'url', url: textPlainUrl, refUrl: textPlain,
});
+5 -5
View File
@@ -1,4 +1,4 @@
import { useBrowseStore } from '~/modules/browse/store-module-browsing';
import { BrowsePageTransform, useBrowseStore } from '~/modules/browse/store-module-browsing';
import { apiAsyncNode } from '~/common/util/trpc.client';
@@ -11,8 +11,8 @@ const DEBUG_SHOW_SCREENSHOT = false;
export async function callBrowseFetchPage(
url: string,
// transforms?: BrowsePageTransform[],
// screenshotOptions?: { width: number, height: number, quality?: number },
transforms?: BrowsePageTransform[],
screenshotOptions?: { width: number, height: number, quality?: number },
) {
// validate url
@@ -33,8 +33,8 @@ export async function callBrowseFetchPage(
},
requests: [{
url,
transforms: /*transforms ? transforms :*/ [pageTransform],
screenshot: /*screenshotOptions ? screenshotOptions :*/ !DEBUG_SHOW_SCREENSHOT ? undefined : {
transforms: transforms ? transforms : [pageTransform],
screenshot: screenshotOptions ? screenshotOptions : !DEBUG_SHOW_SCREENSHOT ? undefined : {
width: 512,
height: 512,
// quality: 100,