PDFUtils: improve PDF to image

This commit is contained in:
Enrico Ros
2024-05-17 06:33:19 -07:00
parent 01a03d164c
commit a183c26e51
+14 -3
View File
@@ -48,15 +48,22 @@ export async function pdfToText(pdfBuffer: ArrayBuffer): Promise<string> {
}
type PdfPageImage = { base64Url: string, scale: number, width: number, height: number };
interface PdfPageImage {
mimeType: string;
base64Data: string;
scale: number;
width: number;
height: number;
};
/**
* Renders all pages of a PDF to images
*
* @param pdfBuffer The content of a PDF file
* @param imageMimeType The MIME type of the image to render (default 'image/jpeg')
* @param scale The scale factor for the image resolution (default 1.5 for moderate quality)
*/
export async function pdfToImageDataURLs(pdfBuffer: ArrayBuffer, scale = 1.5): Promise<PdfPageImage[]> {
export async function pdfToImageDataURLs(pdfBuffer: ArrayBuffer, imageMimeType = 'image/jpeg', scale = 1.5): Promise<PdfPageImage[]> {
const { getDocument } = await dynamicImportPdfJs();
const pdf = await getDocument({ data: pdfBuffer }).promise;
const images: PdfPageImage[] = [];
@@ -74,8 +81,12 @@ export async function pdfToImageDataURLs(pdfBuffer: ArrayBuffer, scale = 1.5): P
viewport,
}).promise;
const base64DataUrl = canvas.toDataURL(imageMimeType, 0.95);
const base64Data = base64DataUrl.slice(`data:${imageMimeType};base64,`.length);
images.push({
base64Url: canvas.toDataURL('image/jpeg'),
mimeType: imageMimeType,
base64Data,
scale,
width: viewport.width,
height: viewport.height,