diff --git a/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx b/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx
index e2235c50c..ccc6b43ec 100644
--- a/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx
+++ b/src/apps/chat/components/composer/llmattachments/LLMAttachmentButton.tsx
@@ -1,7 +1,7 @@
import * as React from 'react';
import TimeAgo from 'react-timeago';
-import { Box, Button, CircularProgress, ColorPaletteProp, Sheet, Typography, VariantProp } from '@mui/joy';
+import { Box, Button, CircularProgress, ColorPaletteProp, ListItem, Sheet, Typography, VariantProp } from '@mui/joy';
import AbcIcon from '@mui/icons-material/Abc';
import CodeIcon from '@mui/icons-material/Code';
import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
@@ -100,8 +100,10 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com
'image-to-default': ImageOutlinedIcon,
'image-caption': AbcIcon,
'image-ocr': AbcIcon,
+ 'pdf-auto': PictureAsPdfIcon,
'pdf-text': PictureAsPdfIcon,
'pdf-images': PermMediaOutlinedIcon,
+ 'pdf-images-ocr': AbcIcon,
'pdf-text-and-images': PermMediaOutlinedIcon,
'docx-to-html': DescriptionOutlinedIcon,
'url-page-text': TextFieldsIcon, // was LanguageIcon
@@ -228,9 +230,10 @@ function LLMAttachmentButton(props: {
const isUnconvertible = !draft.converters.length;
const isOutputLoading = draft.outputsConverting;
const isOutputMissing = !draft.outputFragments.length;
+ const isOutputWarned = !!draft.outputWarnings?.length;
const hasLiveFiles = draft.outputFragments.some(_f => _f.liveFileId);
- const showWarning = isUnconvertible || (isOutputMissing || !llmSupportsAllFragments);
+ const showWarning = isUnconvertible || (isOutputMissing || !llmSupportsAllFragments) || isOutputWarned;
// handlers
diff --git a/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx b/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx
index 1eae95dde..a761cf742 100644
--- a/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx
+++ b/src/apps/chat/components/composer/llmattachments/LLMAttachmentMenu.tsx
@@ -1,16 +1,15 @@
import * as React from 'react';
import type { SxProps } from '@mui/joy/styles/types';
-import { Box, Checkbox, Chip, CircularProgress, LinearProgress, ListDivider, ListItem, ListItemDecorator, MenuItem, Radio, Typography } from '@mui/joy';
-import AttachmentIcon from '@mui/icons-material/Attachment';
+import { Box, Button, ButtonGroup, Checkbox, Chip, CircularProgress, Divider, LinearProgress, ListDivider, ListItem, ListItemDecorator, MenuItem, Radio, Typography } from '@mui/joy';
import ClearIcon from '@mui/icons-material/Clear';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
-import DeleteForeverIcon from '@mui/icons-material/DeleteForever';
+import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';
import ExpandLessIcon from '@mui/icons-material/ExpandLess';
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
+import KeyboardArrowDownIcon from '@mui/icons-material/KeyboardArrowDown';
import KeyboardArrowLeftIcon from '@mui/icons-material/KeyboardArrowLeft';
import KeyboardArrowRightIcon from '@mui/icons-material/KeyboardArrowRight';
-import ReadMoreIcon from '@mui/icons-material/ReadMore';
import VerticalAlignBottomIcon from '@mui/icons-material/VerticalAlignBottom';
import VisibilityIcon from '@mui/icons-material/Visibility';
@@ -18,6 +17,7 @@ import { CloseablePopup } from '~/common/components/CloseablePopup';
import { DMessageAttachmentFragment, DMessageDocPart, DMessageImageRefPart, isDocPart, isImageRefPart, isZyncAssetImageReferencePartWithLegacyDBlob } from '~/common/stores/chat/chat.fragments';
import { LiveFileIcon } from '~/common/livefile/liveFile.icons';
import { copyToClipboard } from '~/common/util/clipboardUtils';
+import { humanReadableBytes } from '~/common/util/textUtils';
import { themeZIndexOverMobileDrawer } from '~/common/app.theme';
import { useUIPreferencesStore } from '~/common/stores/store-ui';
@@ -32,12 +32,20 @@ const DEFAULT_DETAILS_OPEN = true;
const SHOW_INLINING_OPERATIONS = false;
-const indicatorSx = {
- fontSize: '1rem',
-} as const;
+// const indicatorSx = {
+// fontSize: '1rem',
+// } as const;
+//
+// const indicatorGapSx: SxProps = {
+// paddingLeft: '1.375rem',
+// };
-const indicatorGapSx: SxProps = {
- paddingLeft: '1.375rem',
+const actionButtonsSx: SxProps = {
+ ml: 'auto',
+ minHeight: 0,
+ borderRadius: '1rem',
+ backgroundColor: 'background.surface',
+ '& button': { fontSize: 'xs', fontWeight: 'md', py: 0, minWidth: 0, minHeight: 0 },
};
@@ -82,9 +90,10 @@ export function LLMAttachmentMenu(props: {
const isUnconvertible = !draft.converters.length;
const isOutputMissing = !draft.outputFragments.length;
const isOutputMultiple = draft.outputFragments.length > 1;
+ const isOutputWarned = !!draft.outputWarnings?.length;
const hasLiveFiles = draft.outputFragments.some(_f => _f.liveFileId);
- const showWarning = isUnconvertible || isOutputMissing || !llmSupportsAllFragments;
+ const showWarning = isUnconvertible || isOutputMissing || !llmSupportsAllFragments || isOutputWarned;
// hooks
@@ -197,6 +206,17 @@ export function LLMAttachmentMenu(props: {
)}
)}
+ {/* Auto-heuristics message, with explanation */}
+ {!!draft.outputsHeuristic?.isAuto && (
+
+ {draft.outputsHeuristic.isAuto ? 'Auto: ' : ''}
+ {draft.outputsHeuristic.actualConverterId === 'pdf-text' && 'Text'}
+ {draft.outputsHeuristic.actualConverterId === 'pdf-images-ocr' && 'OCR'}
+ {draft.outputsHeuristic.actualConverterId === 'pdf-images' && 'Images'}
+ {draft.outputsHeuristic.actualConverterId === 'pdf-text-and-images' && 'Text + Images'}
+ {draft.outputsHeuristic.explain && ` (${draft.outputsHeuristic.explain})`}
+
+ )}
{!isUnconvertible && draft.converters.map((c, idx) =>
,
)}
{/*{!isUnconvertible && }*/}
- {/* Auto-fallback notice (e.g., PDF with low text converted to images) */}
- {draft.conversionFallback && (
-
- Auto: {draft.conversionFallback.reason}
-
- )}
-
{/* Progress indicator (mainly for OCRs of Images, PDFs, and PDF to Images) */}
{!!draft.outputsConversionProgress && draft.outputsConversionProgress < 1 && (
@@ -268,11 +283,19 @@ export function LLMAttachmentMenu(props: {
{isInputError ? 'Loading Issue' : 'Warning'}
+
+ {/* Only show 1 warning, excluding lower priorities */}
{isInputError ?
{draft.inputError}
: isUnconvertible ? Attachments of type {draft.input?.mimeType} are not supported yet. You can request this on GitHub.
: isOutputMissing ? File not supported. Please try another format.
: !llmSupportsAllFragments ? May not be compatible with the current model. Please try another format.
- : <>Unknown warning>}
+ : draft.outputWarnings?.length ? '' /* printed below */
+ : <>Unknown warning>}
+
+ {/* Explicit output warnings */}
+ {!!draft.outputWarnings?.length && draft.outputWarnings.map((w, widx) =>
+ ⚠️ {w})
+ }
@@ -301,24 +324,24 @@ export function LLMAttachmentMenu(props: {
Details
) : (
-
+
{/* <- inputs */}
{showInputs && !!draftInput && (
- }>
- {draftInput.mimeType}{typeof draftInput.dataSize === 'number' ? ` · ${draftInput.dataSize.toLocaleString()} bytes` : ''}
+
+ Input: {draftInput.mimeType}{typeof draftInput.dataSize === 'number' ? ` · ${humanReadableBytes(draftInput.dataSize)}` : ''}
)}
{showInputs && !!draftInput?.altMimeType && (
-
- {draftInput.altMimeType} · {draftInput.altData?.length.toLocaleString()}
+
+ Input: {draftInput.altMimeType}{!draftInput.altData?.length ? '' : ` · ${humanReadableBytes(draftInput.altData.length)}`}
)}
{showInputs && !!draftInput?.urlImage && (
-
- {draftInput.urlImage.mimeType} · {draftInput.urlImage.width} x {draftInput.urlImage.height} · {draftInput.urlImage.imgDataUrl?.length.toLocaleString()}
- {' · '}
- } onClick={(event) => {
+
+ Input: {draftInput.urlImage.mimeType} · {draftInput.urlImage.width}x{draftInput.urlImage.height}{!draftInput.urlImage.imgDataUrl?.length ? '' : ` · ${humanReadableBytes(draftInput.urlImage.imgDataUrl.length)}`}
+
+ } onClick={(event) => {
if (draftInput?.urlImage?.imgDataUrl) {
// Invoke the viewer but with a virtual 'temp' part description to see this preview image
handleViewImageRefPart(event, {
@@ -332,8 +355,8 @@ export function LLMAttachmentMenu(props: {
height: draftInput.urlImage.height || undefined,
});
}
- }}>
- view
+ }} sx={{ ml: 'auto' }}>
+ view input
)}
@@ -342,45 +365,79 @@ export function LLMAttachmentMenu(props: {
{/* Converters: {draft.converters.map(((converter, idx) => ` ${converter.id}${converter.isActive ? '*' : ''}`)).join(', ')}*/}
{/**/}
+ {/* Downward arrow */}
+
+
+
+
{/* -> Outputs */}
-
+
{isOutputMissing ? (
- }>...
+ {isConverting ? '...' : '... nothing ...'}
) : (
draft.outputFragments.map(({ part }, index) => {
if (isDocPart(part)) {
return (
- }>
- {part.data.mimeType /* part.type: big-agi type, not source mime */} · {part.data.text.length.toLocaleString()} bytes ·
- } onClick={(event) => handleViewDocPart(event, part)}>
- view
-
- } onClick={(event) => handleCopyToClipboard(event, part.data.text)}>
- copy
-
+
+ {part.data.mimeType /* part.type: big-agi type, not source mime */} · {humanReadableBytes(part.data.text.length)}
+ {/*} onClick={(event) => handleViewDocPart(event, part)} sx={{ ml: 'auto' }}>*/}
+ {/* view*/}
+ {/**/}
+ {/*} onClick={(event) => handleCopyToClipboard(event, part.data.text)}>*/}
+ {/* copy*/}
+ {/**/}
+
+ } onClick={(event) => handleViewDocPart(event, part)}>
+ view
+
+
+
);
} else if (isZyncAssetImageReferencePartWithLegacyDBlob(part) || isImageRefPart(part)) {
// Unified Image Reference handling (both Zync Asset References with legacy fallback and legacy image_ref)
const legacyImageRefPart = isZyncAssetImageReferencePartWithLegacyDBlob(part) ? part._legacyImageRefPart! : part;
const { dataRef, width, height } = legacyImageRefPart;
- const resolution = width && height ? `${width} x ${height}` : 'no resolution';
+ const resolution = width && height ? `${width}x${height}` : 'no resolution';
const mime = dataRef.reftype === 'dblob' ? dataRef.mimeType : 'unknown image';
return (
- }>
- {mime /*.replace('image/', 'img: ')*/} · {resolution} · {dataRef.reftype === 'dblob' ? (dataRef.bytesSize?.toLocaleString() || 'no size') : '(remote)'} ·
- }
- onClick={(event) => handleViewImageRefPart(event, legacyImageRefPart)}>
- view
-
- {isOutputMultiple && } onClick={(event) => handleDeleteOutputFragment(event, index)}>
- del
- }
+
+ {mime /*.replace('image/', 'img: ')*/} · {resolution} · {
+ dataRef.reftype !== 'dblob' ? '(remote)'
+ : !dataRef.bytesSize ? 'no size'
+ : humanReadableBytes(dataRef.bytesSize)}
+ {/*}*/}
+ {/* onClick={(event) => handleViewImageRefPart(event, legacyImageRefPart)}>*/}
+ {/* view*/}
+ {/**/}
+ {/*{isOutputMultiple && } onClick={(event) => handleDeleteOutputFragment(event, index)}>*/}
+ {/* del*/}
+ {/*}*/}
+
+ }
+ onClick={(event) => handleViewImageRefPart(event, legacyImageRefPart)}
+ >
+ view
+
+ {isOutputMultiple && (
+ }
+ onClick={(event) => handleDeleteOutputFragment(event, index)}
+ // sx={{ width: 48 }}
+ >
+ del
+
+ )}
+
);
} else {
return (
- }>
+
{(part as DMessageAttachmentFragment['part']).pt}: (other)
);
@@ -388,8 +445,8 @@ export function LLMAttachmentMenu(props: {
})
)}
{!!llmTokenCountApprox && (
-
- ~{llmTokenCountApprox.toLocaleString()} tokens
+
+ ~ {llmTokenCountApprox.toLocaleString()} tokens
)}
diff --git a/src/common/attachment-drafts/attachment.pipeline.ts b/src/common/attachment-drafts/attachment.pipeline.ts
index d4a670c51..2959469db 100644
--- a/src/common/attachment-drafts/attachment.pipeline.ts
+++ b/src/common/attachment-drafts/attachment.pipeline.ts
@@ -11,6 +11,7 @@ import { convert_Base64DataURL_To_Base64WithMimeType, convert_Base64WithMimeType
import { getDomainModelConfiguration } from '~/common/stores/llms/hooks/useModelDomain';
import { htmlTableToMarkdown } from '~/common/util/htmlTableToMarkdown';
import { humanReadableHyphenated } from '~/common/util/textUtils';
+import { ocrImageWithProgress, ocrPdfPagesWithProgress } from '~/common/util/ocrUtils';
import { pdfToImageDataURLs, pdfToText } from '~/common/util/pdfUtils';
import { createDMessageDataInlineText, createDocAttachmentFragment, DMessageAttachmentFragment, DMessageDataInline, DMessageDocPart, DVMimeType, isContentOrAttachmentFragment, isDocPart, specialContentPartToDocAttachmentFragment } from '~/common/stores/chat/chat.fragments';
@@ -28,7 +29,8 @@ const ENABLE_TEXT_AND_IMAGES = false; // [PROD] ?
const DOCPART_DEFAULT_VERSION = 1;
// PDF text extraction quality thresholds
-const PDF_LOW_TEXT_THRESHOLD = 100; // chars per page - below this, consider the PDF as scanned/image-based
+const IMAGE_LOW_TEXT_THRESHOLD = 80; // chars per image - below this, consider the image as low-text (photo-like) rather than document-like
+const PDF_LOW_TEXT_THRESHOLD = 160; // chars per page - below this, consider the PDF as scanned/image-based
const PDF_FALLBACK_MAX_IMAGES = 32; // max pages to convert to images when auto-falling back (to respect LLM limits)
@@ -288,16 +290,18 @@ export function attachmentDefineConverters(source: AttachmentDraftSource, input:
converters.push({ id: 'image-original', name: 'Image (original quality)', disabled: !inputImageMimeSupported });
if (!inputImageMimeSupported)
converters.push({ id: 'image-to-default', name: `As Image (${PLATFORM_IMAGE_MIMETYPE})` });
- converters.push({ id: 'image-caption', name: 'Caption (Text)', disabled: visionModelMissing });
+ converters.push({ id: 'image-caption', name: 'AI Caption (Text)', disabled: visionModelMissing });
converters.push({ id: 'unhandled', name: 'No Image' });
converters.push({ id: 'image-ocr', name: 'Add Text (OCR)', isCheckbox: true });
break;
// PDF
case mimeTypeIsPDF(input.mimeType):
- converters.push({ id: 'pdf-text', name: 'PDF To Text', isActive: !autoAddImages || undefined });
- converters.push({ id: 'pdf-images', name: 'PDF To Images' });
- converters.push({ id: 'pdf-text-and-images', name: 'PDF Text & Images (best)', isActive: autoAddImages });
+ converters.push({ id: 'pdf-auto', name: 'Auto', isActive: !autoAddImages });
+ converters.push({ id: 'pdf-text', name: 'PDF Text' });
+ converters.push({ id: 'pdf-images-ocr', name: 'PDF -> OCR (for scans)' });
+ converters.push({ id: 'pdf-images', name: 'PDF -> Images' });
+ converters.push({ id: 'pdf-text-and-images', name: 'PDF -> Text + Images', isActive: autoAddImages });
break;
// DOCX
@@ -483,6 +487,8 @@ export async function attachmentPerformConversion(
edit(attachment.id, {
outputsConverting: true,
outputsConversionProgress: null,
+ outputWarnings: undefined,
+ outputsHeuristic: undefined,
});
// apply converter to the input
@@ -575,23 +581,14 @@ export async function attachmentPerformConversion(
case 'image-ocr':
if (!_expectBlob(input.data, 'Image OCR converter')) break;
try {
- let lastProgress = -1;
- const { recognize } = await import('tesseract.js');
- const result = await recognize(input.data, undefined, {
- errorHandler: e => console.error(e),
- logger: (message) => {
- if (message.status === 'recognizing text') {
- if (message.progress > lastProgress + 0.01) {
- lastProgress = message.progress;
- edit(attachment.id, { outputsConversionProgress: lastProgress });
- }
- }
- },
- });
- const imageText = result.data.text;
+ // Image -> OCR -> Inline text doc
+ const imageText = await ocrImageWithProgress(input.data, (progress) => edit(attachment.id, { outputsConversionProgress: progress }));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(imageText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'image' }));
+ // warn if very little text was extracted (likely a photo/diagram rather than text)
+ if (imageText.trim().length < IMAGE_LOW_TEXT_THRESHOLD)
+ edit(attachment.id, { outputWarnings: ['Very little text extracted - this image may not contain readable text.'] });
} catch (error) {
- console.error(error);
+ console.error('[Image OCR Error]', error);
}
break;
@@ -620,65 +617,111 @@ export async function attachmentPerformConversion(
} catch (error: any) {
console.log('[DEV] Failed to caption image:', error);
const errorText = `[Captioning failed: ${error?.message || String(error)}]`;
+ edit(attachment.id, { outputWarnings: [errorText] });
newFragments.push(createDocAttachmentFragment(title, caption + ' (Error)', DVMimeType.TextPlain, createDMessageDataInlineText(errorText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'image-caption' }));
}
break;
- // pdf to text (with auto-fallback to images for scanned/image-based PDFs)
- case 'pdf-text':
- if (!_expectBlob(input.data, 'PDF text converter')) break;
+ // pdf-auto: intelligent conversion with fallback chain (text → OCR → images)
+ case 'pdf-auto':
+ if (!_expectBlob(input.data, 'PDF auto converter')) break;
try {
- // Convert Blob to ArrayBuffer for PDF.js
+ // Phase 1: Try text extraction (0-20% progress)
const pdfArrayBuffer = await input.data.arrayBuffer();
- // Extract text with quality metadata
+ // [pdf-text] Extract text with quality metadata
const pdfTextResult = await pdfToText(pdfArrayBuffer, (progress: number) => {
- // Reserve 0-30% for text extraction attempt, 30-100% for potential image fallback
- edit(attachment.id, { outputsConversionProgress: progress * 0.3 });
+ // Reserve 0-20% for text extraction attempt, 20-100% for potential image fallback
+ edit(attachment.id, { outputsConversionProgress: progress * 0.2 });
});
// Check text density to detect scanned/image-based PDFs
if (pdfTextResult.avgCharsPerPage >= PDF_LOW_TEXT_THRESHOLD) {
// Good text extraction - use it
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfTextResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+ edit(attachment.id, {
+ outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-text', explain: `${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page` },
+ });
} else {
- // Low text density detected - auto-fallback to images
- console.log(`[PDF] Low text density (${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page across ${pdfTextResult.pageCount} pages), falling back to images`);
+ // Low text density - try OCR
+ // console.log(`[PDF Auto] Low text density (${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page), trying OCR...`);
- // Need fresh ArrayBuffer for image rendering (previous one may be consumed)
+ // [pdf-images] Phase 2: Render pages to images (20-40% progress)
const pdfArrayBufferForImages = await input.data.arrayBuffer();
const imageDataURLs = await pdfToImageDataURLs(pdfArrayBufferForImages, PLATFORM_IMAGE_MIMETYPE, PDF_IMAGE_QUALITY, PDF_IMAGE_PAGE_SCALE, (progress) => {
- edit(attachment.id, { outputsConversionProgress: 0.3 + progress * 0.7 }); // 30-100%
+ edit(attachment.id, { outputsConversionProgress: 0.2 + progress * 0.2 });
});
- // Limit pages to respect LLM image limits
- const pagesToAttach = Math.min(imageDataURLs.length, PDF_FALLBACK_MAX_IMAGES);
- for (let i = 0; i < pagesToAttach; i++) {
- const pdfPageImage = imageDataURLs[i];
- const pdfPageImageF = await imageDataToImageAttachmentFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `${title} (pg. ${i + 1})`, caption, false, false);
- if (pdfPageImageF)
- newFragments.push(pdfPageImageF);
+ // Limit pages for OCR (performance)
+ const pagesToProcess = Math.min(imageDataURLs.length, PDF_FALLBACK_MAX_IMAGES);
+ const imagesToOcr = imageDataURLs.slice(0, pagesToProcess);
+
+ // Phase 3: Try OCR on rendered pages (40-90% progress)
+ try {
+ // [pdf-images-ocr] OCR the images
+ const ocrResult = await ocrPdfPagesWithProgress(imagesToOcr, (progress) => {
+ edit(attachment.id, { outputsConversionProgress: 0.4 + progress * 0.5 });
+ });
+
+ if (ocrResult.avgCharsPerPage >= PDF_LOW_TEXT_THRESHOLD) {
+ // OCR yielded good text - use it
+ newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(ocrResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+ const truncNote = pdfTextResult.pageCount > pagesToProcess ? ` (${pagesToProcess}/${pdfTextResult.pageCount} pages)` : '';
+ edit(attachment.id, {
+ outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-images-ocr', explain: /*OCR extracted */`${ocrResult.avgCharsPerPage.toFixed(0)} chars/page${truncNote}` },
+ });
+ } else {
+ // OCR also yielded poor results - fall back to images
+ // console.log(`[PDF Auto] OCR also sparse (${ocrResult.avgCharsPerPage.toFixed(0)} chars/page), falling back to images`);
+ for (let i = 0; i < pagesToProcess; i++) {
+ const pdfPageImage = imageDataURLs[i];
+ const pdfPageImageF = await imageDataToImageAttachmentFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `${title} (pg. ${i + 1})`, caption, false, false);
+ if (pdfPageImageF)
+ newFragments.push(pdfPageImageF);
+ }
+ const truncNote = pdfTextResult.pageCount > pagesToProcess ? ` (${pagesToProcess}/${pdfTextResult.pageCount} pages)` : '';
+ edit(attachment.id, {
+ outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-images', explain: `not a text page${truncNote}` },
+ });
+ }
+ } catch (ocrError) {
+ // OCR failed - fall back to images
+ console.warn('[PDF Auto] OCR failed, falling back to images:', ocrError);
+ for (let i = 0; i < pagesToProcess; i++) {
+ const pdfPageImage = imageDataURLs[i];
+ const pdfPageImageF = await imageDataToImageAttachmentFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `${title} (pg. ${i + 1})`, caption, false, false);
+ if (pdfPageImageF)
+ newFragments.push(pdfPageImageF);
+ }
+ edit(attachment.id, {
+ outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-images', explain: 'OCR failed, attached as images' },
+ });
}
-
- // Set fallback info for UI display
- const truncatedNote = pdfTextResult.pageCount > PDF_FALLBACK_MAX_IMAGES
- ? ` (first ${pagesToAttach} of ${pdfTextResult.pageCount} pages)`
- : '';
- edit(attachment.id, {
- conversionFallback: {
- from: 'pdf-text',
- to: 'pdf-images',
- reason: `Low text density (${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page) - converted to images${truncatedNote}`,
- },
- });
}
+ } catch (error) {
+ console.error('Error in PDF auto conversion:', error);
+ }
+ break;
+
+ // pdf-text: strict text extraction, no fallback (honors user choice)
+ case 'pdf-text':
+ if (!_expectBlob(input.data, 'PDF text converter')) break;
+ try {
+ const pdfTextResult = await pdfToText(await input.data.arrayBuffer(), progress => edit(attachment.id, { outputsConversionProgress: progress }));
+ // Always output text, even if sparse (user explicitly chose this)
+ newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfTextResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+ edit(attachment.id, {
+ // warn if very little text was extracted (likely a scanned PDF)
+ outputWarnings: pdfTextResult.avgCharsPerPage >= 20 ? undefined : ['Very little text extracted - this PDF may be scanned. Try "Auto" or "OCR (for scans)" mode.'],
+ outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-text', explain: `${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page` },
+ });
} catch (error) {
console.error('Error in PDF text extraction:', error);
}
break;
- // pdf to images
+ // pdf-images: render all pages as images (honors user choice)
case 'pdf-images':
if (!_expectBlob(input.data, 'PDF images converter')) break;
// Convert Blob to ArrayBuffer for PDF.js
@@ -691,11 +734,39 @@ export async function attachmentPerformConversion(
if (pdfPageImageF)
newFragments.push(pdfPageImageF);
}
+ edit(attachment.id, {
+ outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-images', explain: `${imageDataURLs.length} pages` },
+ });
} catch (error) {
console.error('Error converting PDF to images:', error);
}
break;
+ // pdf-images-ocr: force OCR on all pages (for scanned documents)
+ case 'pdf-images-ocr':
+ if (!_expectBlob(input.data, 'PDF OCR converter')) break;
+ try {
+ // Render pages to images (0-40% progress)
+ const imageDataURLs = await pdfToImageDataURLs(await input.data.arrayBuffer(), PLATFORM_IMAGE_MIMETYPE, PDF_IMAGE_QUALITY, PDF_IMAGE_PAGE_SCALE, (progress) => {
+ edit(attachment.id, { outputsConversionProgress: progress * 0.4 });
+ });
+
+ // OCR all pages (40-100% progress)
+ const ocrResult = await ocrPdfPagesWithProgress(imageDataURLs, (progress) => {
+ edit(attachment.id, { outputsConversionProgress: 0.4 + progress * 0.6 });
+ });
+
+ newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(ocrResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+ edit(attachment.id, {
+ // warn if very little text was extracted (likely a scanned PDF)
+ outputWarnings: ocrResult.avgCharsPerPage >= 20 ? undefined : ['Very little text extracted via OCR - this PDF may contain mostly images/diagrams.'],
+ outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-images-ocr', explain: `${ocrResult.avgCharsPerPage.toFixed(0)} chars/page from ${ocrResult.pageCount} pages` },
+ });
+ } catch (error) {
+ console.error('Error in PDF OCR:', error);
+ }
+ break;
+
// pdf to text and images
case 'pdf-text-and-images':
if (!_expectBlob(input.data, 'PDF text and images converter')) break;
@@ -725,10 +796,13 @@ export async function attachmentPerformConversion(
const textFragment = createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfTextResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' });
newFragments.push(textFragment);
}
- // Note: if text is sparse, images are still attached (user explicitly chose text+images), so we don't consider density here
+ // Note: if text is sparse, images are still attached (user explicitly chose text+images)
// Add the text fragment first, then the image fragments
newFragments.push(...imageFragments);
+ edit(attachment.id, {
+ outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-text-and-images', explain: `${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page + ${imageFragments.length} images` },
+ });
} catch (error) {
console.error('Error converting PDF to text and images:', error);
}
diff --git a/src/common/attachment-drafts/attachment.types.ts b/src/common/attachment-drafts/attachment.types.ts
index c5d1c68d9..b2fcafcc3 100644
--- a/src/common/attachment-drafts/attachment.types.ts
+++ b/src/common/attachment-drafts/attachment.types.ts
@@ -24,11 +24,14 @@ export type AttachmentDraft = {
outputsConversionProgress: number | null;
outputFragments: DMessageAttachmentFragment[];
- // Auto-fallback info: set when a converter auto-switches due to quality issues (e.g., PDF with low text density)
- conversionFallback?: {
- from: AttachmentDraftConverterType;
- to: AttachmentDraftConverterType;
- reason: string;
+ // Warnings for poor conversions (e.g. scanned PDF with text extraction rather than OCR)
+ outputWarnings?: string[];
+
+ // Tracks what method was actually used (especially for Auto mode)
+ outputsHeuristic?: {
+ isAuto: boolean;
+ actualConverterId: AttachmentDraftConverterType;
+ explain?: string; // e.g., "42 chars/page detected"
};
// metadata: {
@@ -144,7 +147,7 @@ export type AttachmentDraftConverter = {
export type AttachmentDraftConverterType =
| 'text' | 'rich-text' | 'rich-text-cleaner' | 'rich-text-table'
| 'image-original' | 'image-resized-high' | 'image-resized-low' | 'image-ocr' | 'image-caption' | 'image-to-default'
- | 'pdf-text' | 'pdf-images' | 'pdf-text-and-images'
+ | 'pdf-auto' | 'pdf-text' | 'pdf-images' | 'pdf-images-ocr' | 'pdf-text-and-images'
| 'docx-to-html'
| 'url-page-text' | 'url-page-markdown' | 'url-page-html' | 'url-page-null' | 'url-page-image'
| 'youtube-transcript' | 'youtube-transcript-simple'