From a183c26e516ac6802534e7b185305eae2fbfa745 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Fri, 17 May 2024 06:33:19 -0700 Subject: [PATCH] PDFUtils: improve PDF to image --- src/common/util/pdfUtils.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/common/util/pdfUtils.ts b/src/common/util/pdfUtils.ts index fce030487..a5165f1ac 100644 --- a/src/common/util/pdfUtils.ts +++ b/src/common/util/pdfUtils.ts @@ -48,15 +48,22 @@ export async function pdfToText(pdfBuffer: ArrayBuffer): Promise { } -type PdfPageImage = { base64Url: string, scale: number, width: number, height: number }; +interface PdfPageImage { + mimeType: string; + base64Data: string; + scale: number; + width: number; + height: number; +}; /** * Renders all pages of a PDF to images * * @param pdfBuffer The content of a PDF file + * @param imageMimeType The MIME type of the image to render (default 'image/jpeg') * @param scale The scale factor for the image resolution (default 1.5 for moderate quality) */ -export async function pdfToImageDataURLs(pdfBuffer: ArrayBuffer, scale = 1.5): Promise { +export async function pdfToImageDataURLs(pdfBuffer: ArrayBuffer, imageMimeType = 'image/jpeg', scale = 1.5): Promise { const { getDocument } = await dynamicImportPdfJs(); const pdf = await getDocument({ data: pdfBuffer }).promise; const images: PdfPageImage[] = []; @@ -74,8 +81,12 @@ export async function pdfToImageDataURLs(pdfBuffer: ArrayBuffer, scale = 1.5): P viewport, }).promise; + const base64DataUrl = canvas.toDataURL(imageMimeType, 0.95); + const base64Data = base64DataUrl.slice(`data:${imageMimeType};base64,`.length); + images.push({ - base64Url: canvas.toDataURL('image/jpeg'), + mimeType: imageMimeType, + base64Data, scale, width: viewport.width, height: viewport.height,