diff --git a/src/modules/browse/browse.client.ts b/src/modules/browse/browse.client.ts index 8776177c8..785066b18 100644 --- a/src/modules/browse/browse.client.ts +++ b/src/modules/browse/browse.client.ts @@ -26,7 +26,10 @@ export async function callBrowseFetchPage(url: string) { dialect: 'browse-wss', ...(!!clientWssEndpoint && { wssEndpoint: clientWssEndpoint }), }, - subjects: [{ url }], + subjects: [{ + url, + transform: 'markdown', + }], screenshot: DEBUG_SHOW_SCREENSHOT ? { width: 512, height: 512, diff --git a/src/modules/browse/browse.router.ts b/src/modules/browse/browse.router.ts index 063391cb4..435eae49d 100644 --- a/src/modules/browse/browse.router.ts +++ b/src/modules/browse/browse.router.ts @@ -17,10 +17,14 @@ const browseAccessSchema = z.object({ wssEndpoint: z.string().trim().optional(), }); +const pageTransformSchema = z.enum(['html', 'text', 'markdown']); +type PageTransformSchema = z.infer; + const fetchPageInputSchema = z.object({ access: browseAccessSchema, subjects: z.array(z.object({ url: z.string().url(), + transform: pageTransformSchema, })), screenshot: z.object({ width: z.number(), @@ -60,7 +64,7 @@ export const browseRouter = createTRPCRouter({ for (const subject of subjects) { try { - pages.push(await workerPuppeteer(access, subject.url, screenshot?.width, screenshot?.height, screenshot?.quality)); + pages.push(await workerPuppeteer(access, subject.url, subject.transform, screenshot?.width, screenshot?.height, screenshot?.quality)); } catch (error: any) { pages.push({ url: subject.url, @@ -80,7 +84,11 @@ export const browseRouter = createTRPCRouter({ type BrowseAccessSchema = z.infer; type FetchPageWorkerOutputSchema = z.infer; -async function workerPuppeteer(access: BrowseAccessSchema, targetUrl: string, ssWidth: number | undefined, ssHeight: number | undefined, ssQuality: number | undefined): Promise { + +/** + * Puppeteer implementation of the worker + */ +async function workerPuppeteer(access: BrowseAccessSchema, targetUrl: string, transform: PageTransformSchema, ssWidth: number | undefined, ssHeight: number | undefined, ssQuality: number | undefined): Promise { // access const browserWSEndpoint = (access.wssEndpoint || env.PUPPETEER_WSS_ENDPOINT || '').trim();