Attachments: PDF: add a PDF->OCR (via interim images) and an 'Auto' (default)

The Auto mode tries plain Text, then Text to Images to OCR, then falls back to pure images.
2026-05-10 21:50:14 -07:00 · 2026-01-14 15:09:40 -08:00
parent 7aa9cb07b2
commit 88d39345a5
4 changed files with 250 additions and 113 deletions
@@ -1,7 +1,7 @@
 import * as React from 'react';
 import TimeAgo from 'react-timeago';

-import { Box, Button, CircularProgress, ColorPaletteProp, Sheet, Typography, VariantProp } from '@mui/joy';
+import { Box, Button, CircularProgress, ColorPaletteProp, ListItem, Sheet, Typography, VariantProp } from '@mui/joy';
 import AbcIcon from '@mui/icons-material/Abc';
 import CodeIcon from '@mui/icons-material/Code';
 import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
@@ -100,8 +100,10 @@ const converterTypeToIconMap: { [key in AttachmentDraftConverterType]: React.Com
  'image-to-default': ImageOutlinedIcon,
  'image-caption': AbcIcon,
  'image-ocr': AbcIcon,
+  'pdf-auto': PictureAsPdfIcon,
  'pdf-text': PictureAsPdfIcon,
  'pdf-images': PermMediaOutlinedIcon,
+  'pdf-images-ocr': AbcIcon,
  'pdf-text-and-images': PermMediaOutlinedIcon,
  'docx-to-html': DescriptionOutlinedIcon,
  'url-page-text': TextFieldsIcon, // was LanguageIcon
@@ -228,9 +230,10 @@ function LLMAttachmentButton(props: {
  const isUnconvertible = !draft.converters.length;
  const isOutputLoading = draft.outputsConverting;
  const isOutputMissing = !draft.outputFragments.length;
+  const isOutputWarned = !!draft.outputWarnings?.length;
  const hasLiveFiles = draft.outputFragments.some(_f => _f.liveFileId);

-  const showWarning = isUnconvertible || (isOutputMissing || !llmSupportsAllFragments);
+  const showWarning = isUnconvertible || (isOutputMissing || !llmSupportsAllFragments) || isOutputWarned;


  // handlers
@@ -1,16 +1,15 @@
 import * as React from 'react';

 import type { SxProps } from '@mui/joy/styles/types';
-import { Box, Checkbox, Chip, CircularProgress, LinearProgress, ListDivider, ListItem, ListItemDecorator, MenuItem, Radio, Typography } from '@mui/joy';
-import AttachmentIcon from '@mui/icons-material/Attachment';
+import { Box, Button, ButtonGroup, Checkbox, Chip, CircularProgress, Divider, LinearProgress, ListDivider, ListItem, ListItemDecorator, MenuItem, Radio, Typography } from '@mui/joy';
 import ClearIcon from '@mui/icons-material/Clear';
 import ContentCopyIcon from '@mui/icons-material/ContentCopy';
-import DeleteForeverIcon from '@mui/icons-material/DeleteForever';
+import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';
 import ExpandLessIcon from '@mui/icons-material/ExpandLess';
 import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
+import KeyboardArrowDownIcon from '@mui/icons-material/KeyboardArrowDown';
 import KeyboardArrowLeftIcon from '@mui/icons-material/KeyboardArrowLeft';
 import KeyboardArrowRightIcon from '@mui/icons-material/KeyboardArrowRight';
-import ReadMoreIcon from '@mui/icons-material/ReadMore';
 import VerticalAlignBottomIcon from '@mui/icons-material/VerticalAlignBottom';
 import VisibilityIcon from '@mui/icons-material/Visibility';

@@ -18,6 +17,7 @@ import { CloseablePopup } from '~/common/components/CloseablePopup';
 import { DMessageAttachmentFragment, DMessageDocPart, DMessageImageRefPart, isDocPart, isImageRefPart, isZyncAssetImageReferencePartWithLegacyDBlob } from '~/common/stores/chat/chat.fragments';
 import { LiveFileIcon } from '~/common/livefile/liveFile.icons';
 import { copyToClipboard } from '~/common/util/clipboardUtils';
+import { humanReadableBytes } from '~/common/util/textUtils';
 import { themeZIndexOverMobileDrawer } from '~/common/app.theme';
 import { useUIPreferencesStore } from '~/common/stores/store-ui';

@@ -32,12 +32,20 @@ const DEFAULT_DETAILS_OPEN = true;
 const SHOW_INLINING_OPERATIONS = false;


-const indicatorSx = {
-  fontSize: '1rem',
-} as const;
+// const indicatorSx = {
+//   fontSize: '1rem',
+// } as const;
+//
+// const indicatorGapSx: SxProps = {
+//   paddingLeft: '1.375rem',
+// };

-const indicatorGapSx: SxProps = {
-  paddingLeft: '1.375rem',
+const actionButtonsSx: SxProps = {
+  ml: 'auto',
+  minHeight: 0,
+  borderRadius: '1rem',
+  backgroundColor: 'background.surface',
+  '& button': { fontSize: 'xs', fontWeight: 'md', py: 0, minWidth: 0, minHeight: 0 },
 };


@@ -82,9 +90,10 @@ export function LLMAttachmentMenu(props: {
  const isUnconvertible = !draft.converters.length;
  const isOutputMissing = !draft.outputFragments.length;
  const isOutputMultiple = draft.outputFragments.length > 1;
+  const isOutputWarned = !!draft.outputWarnings?.length;
  const hasLiveFiles = draft.outputFragments.some(_f => _f.liveFileId);

-  const showWarning = isUnconvertible || isOutputMissing || !llmSupportsAllFragments;
+  const showWarning = isUnconvertible || isOutputMissing || !llmSupportsAllFragments || isOutputWarned;


  // hooks
@@ -197,6 +206,17 @@ export function LLMAttachmentMenu(props: {
          )}
        </ListItem>
      )}
+      {/* Auto-heuristics message, with explanation */}
+      {!!draft.outputsHeuristic?.isAuto && (
+        <ListItem color={draft.outputsHeuristic.isAuto ? 'primary' : undefined} sx={{ fontSize: 'sm', fontWeight: 'lg', mb: 0.5 }}>
+          {draft.outputsHeuristic.isAuto ? 'Auto: ' : ''}
+          {draft.outputsHeuristic.actualConverterId === 'pdf-text' && 'Text'}
+          {draft.outputsHeuristic.actualConverterId === 'pdf-images-ocr' && 'OCR'}
+          {draft.outputsHeuristic.actualConverterId === 'pdf-images' && 'Images'}
+          {draft.outputsHeuristic.actualConverterId === 'pdf-text-and-images' && 'Text + Images'}
+          {draft.outputsHeuristic.explain && ` (${draft.outputsHeuristic.explain})`}
+        </ListItem>
+      )}
      {!isUnconvertible && draft.converters.map((c, idx) =>
        <MenuItem
          disabled={c.disabled || isConverting}
@@ -213,18 +233,13 @@ export function LLMAttachmentMenu(props: {
          </ListItemDecorator>
          {c.unsupported
            ? <Box>Unsupported 🤔 <Typography level='body-xs'>{c.name}</Typography></Box>
-            : c.name}
+            : (/* auto-converted */ draft.outputsHeuristic?.isAuto && c.id === draft.outputsHeuristic.actualConverterId)
+              ? <Box component='span' sx={{ fontWeight: 'lg', color: 'primary.softColor' }}>{c.name}</Box>
+              : c.name}
        </MenuItem>,
      )}
      {/*{!isUnconvertible && <ListDivider sx={{ mb: 0 }} />}*/}

-      {/* Auto-fallback notice (e.g., PDF with low text converted to images) */}
-      {draft.conversionFallback && (
-        <ListItem sx={{ fontSize: 'sm', color: 'success.softColor', fontStyle: 'italic', py: 0.5, px: 2 }}>
-          Auto: {draft.conversionFallback.reason}
-        </ListItem>
-      )}
-
      {/* Progress indicator (mainly for OCRs of Images, PDFs, and PDF to Images) */}
      {!!draft.outputsConversionProgress && draft.outputsConversionProgress < 1 && (
        <LinearProgress determinate value={100 * draft.outputsConversionProgress} sx={{ mx: 1 }} />
@@ -268,11 +283,19 @@ export function LLMAttachmentMenu(props: {
              <Typography color={isInputError ? 'danger' : 'warning'} level='title-sm'>
                {isInputError ? 'Loading Issue' : 'Warning'}
              </Typography>
+
+              {/* Only show 1 warning, excluding lower priorities */}
              {isInputError ? <div>{draft.inputError}</div>
                : isUnconvertible ? <div>Attachments of type {draft.input?.mimeType} are not supported yet. You can request this on GitHub.</div>
                  : isOutputMissing ? <div>File not supported. Please try another format.</div>
                    : !llmSupportsAllFragments ? <div>May not be compatible with the current model. Please try another format.</div>
-                      : <>Unknown warning</>}
+                      : draft.outputWarnings?.length ? '' /* printed below */
+                        : <>Unknown warning</>}
+
+              {/* Explicit output warnings */}
+              {!!draft.outputWarnings?.length && draft.outputWarnings.map((w, widx) =>
+                <Box key={'ow-' + widx} sx={{ fontSize: 'sm', color: 'warning.softColor', py: 1 }}>⚠️ {w}</Box>)
+              }
            </Box>
          </MenuItem>
        </Box>
@@ -301,24 +324,24 @@ export function LLMAttachmentMenu(props: {
            Details
          </Typography>
        ) : (
-          <Box sx={{ my: 0.5 }}>
+          <Box sx={{ my: 1 }}>

            {/* <- inputs */}
            {showInputs && !!draftInput && (
-              <Typography level='body-sm' textColor='text.primary' startDecorator={<AttachmentIcon sx={indicatorSx} />}>
-                {draftInput.mimeType}{typeof draftInput.dataSize === 'number' ? ` · ${draftInput.dataSize.toLocaleString()} bytes` : ''}
+              <Typography level='body-sm' textColor='success.softColor'>
+                Input: {draftInput.mimeType}{typeof draftInput.dataSize === 'number' ? ` · ${humanReadableBytes(draftInput.dataSize)}` : ''}
              </Typography>
            )}
            {showInputs && !!draftInput?.altMimeType && (
-              <Typography level='body-sm' sx={indicatorGapSx}>
-                {draftInput.altMimeType} · {draftInput.altData?.length.toLocaleString()}
+              <Typography level='body-sm' textColor='success.softColor'>
+                Input: {draftInput.altMimeType}{!draftInput.altData?.length ? '' : ` · ${humanReadableBytes(draftInput.altData.length)}`}
              </Typography>
            )}
            {showInputs && !!draftInput?.urlImage && (
-              <Typography level='body-sm' sx={indicatorGapSx}>
-                {draftInput.urlImage.mimeType} · {draftInput.urlImage.width} x {draftInput.urlImage.height} · {draftInput.urlImage.imgDataUrl?.length.toLocaleString()}
-                {' · '}
-                <Chip component='span' size='sm' color='primary' variant='outlined' startDecorator={<VisibilityIcon />} onClick={(event) => {
+              <Typography level='body-sm' textColor='success.softColor' sx={{ display: 'flex', alignItems: 'center' }}>
+                Input: {draftInput.urlImage.mimeType} · {draftInput.urlImage.width}x{draftInput.urlImage.height}{!draftInput.urlImage.imgDataUrl?.length ? '' : ` · ${humanReadableBytes(draftInput.urlImage.imgDataUrl.length)}`}
+                &nbsp;
+                <Chip component='span' size='sm' color='success' variant='soft' startDecorator={<VisibilityIcon />} onClick={(event) => {
                  if (draftInput?.urlImage?.imgDataUrl) {
                    // Invoke the viewer but with a virtual 'temp' part description to see this preview image
                    handleViewImageRefPart(event, {
@@ -332,8 +355,8 @@ export function LLMAttachmentMenu(props: {
                      height: draftInput.urlImage.height || undefined,
                    });
                  }
-                }}>
-                  view
+                }} sx={{ ml: 'auto' }}>
+                  view input
                </Chip>
              </Typography>
            )}
@@ -342,45 +365,79 @@ export function LLMAttachmentMenu(props: {
            {/*  Converters: {draft.converters.map(((converter, idx) => ` ${converter.id}${converter.isActive ? '*' : ''}`)).join(', ')}*/}
            {/*</Typography>*/}

+            {/* Downward arrow */}
+            <Divider color='success'>
+              <KeyboardArrowDownIcon color='success' />
+            </Divider>
+
            {/* -> Outputs */}
-            <Box sx={{ mt: 1 }}>
+            <Box>
              {isOutputMissing ? (
-                <Typography level='body-sm' startDecorator={<ReadMoreIcon sx={indicatorSx} />}>...</Typography>
+                <Typography level='body-sm' color={isConverting ? 'primary' : 'danger'}>{isConverting ? '...' : '... nothing ...'}</Typography>
              ) : (
                draft.outputFragments.map(({ part }, index) => {
                  if (isDocPart(part)) {
                    return (
-                      <Typography key={index} level='body-sm' sx={{ color: 'text.primary' }} startDecorator={<ReadMoreIcon sx={indicatorSx} />}>
-                        <span>{part.data.mimeType /* part.type: big-agi type, not source mime */} · {part.data.text.length.toLocaleString()} bytes ·&nbsp;</span>
-                        <Chip component='span' size='sm' color='primary' variant='outlined' startDecorator={<VisibilityIcon />} onClick={(event) => handleViewDocPart(event, part)}>
-                          view
-                        </Chip>
-                        <Chip component='span' size='sm' color='success' variant='outlined' startDecorator={<ContentCopyIcon />} onClick={(event) => handleCopyToClipboard(event, part.data.text)}>
-                          copy
-                        </Chip>
+                      <Typography key={index} component='div' level='body-sm' textColor='primary.softColor' sx={{ display: 'flex', alignItems: 'center' }}>
+                        <span>{part.data.mimeType /* part.type: big-agi type, not source mime */} · {humanReadableBytes(part.data.text.length)} &nbsp;</span>
+                        {/*<Chip component='span' size='sm' color='primary' variant='outlined' startDecorator={<VisibilityIcon />} onClick={(event) => handleViewDocPart(event, part)} sx={{ ml: 'auto' }}>*/}
+                        {/*  view*/}
+                        {/*</Chip>*/}
+                        {/*<Chip component='span' size='sm' color='primary' variant='outlined' startDecorator={<ContentCopyIcon />} onClick={(event) => handleCopyToClipboard(event, part.data.text)}>*/}
+                        {/*  copy*/}
+                        {/*</Chip>*/}
+                        <ButtonGroup size='sm' color='primary' variant='outlined' sx={actionButtonsSx}>
+                          <Button startDecorator={<VisibilityIcon sx={{ fontSize: 'md' }} />} onClick={(event) => handleViewDocPart(event, part)}>
+                            view
+                          </Button>
+                          <Button onClick={(event) => handleCopyToClipboard(event, part.data.text)}/* endDecorator={<ContentCopyIcon />} */>
+                            copy
+                          </Button>
+                        </ButtonGroup>
                      </Typography>
                    );
                  } else if (isZyncAssetImageReferencePartWithLegacyDBlob(part) || isImageRefPart(part)) {
                    // Unified Image Reference handling (both Zync Asset References with legacy fallback and legacy image_ref)
                    const legacyImageRefPart = isZyncAssetImageReferencePartWithLegacyDBlob(part) ? part._legacyImageRefPart! : part;
                    const { dataRef, width, height } = legacyImageRefPart;
-                    const resolution = width && height ? `${width} x ${height}` : 'no resolution';
+                    const resolution = width && height ? `${width}x${height}` : 'no resolution';
                    const mime = dataRef.reftype === 'dblob' ? dataRef.mimeType : 'unknown image';
                    return (
-                      <Typography key={index} level='body-sm' sx={{ color: 'text.primary' }} startDecorator={<ReadMoreIcon sx={indicatorSx} />}>
-                        <span>{mime /*.replace('image/', 'img: ')*/} · {resolution} · {dataRef.reftype === 'dblob' ? (dataRef.bytesSize?.toLocaleString() || 'no size') : '(remote)'} ·&nbsp;</span>
-                        <Chip component='span' size={isOutputMultiple ? 'sm' : 'md'} color='primary' variant='outlined' startDecorator={<VisibilityIcon />}
-                              onClick={(event) => handleViewImageRefPart(event, legacyImageRefPart)}>
-                          view
-                        </Chip>
-                        {isOutputMultiple && <Chip component='span' size={isOutputMultiple ? 'sm' : 'md'} color='danger' variant='outlined' startDecorator={<DeleteForeverIcon />} onClick={(event) => handleDeleteOutputFragment(event, index)}>
-                          del
-                        </Chip>}
+                      <Typography key={index} component='div' level='body-sm' textColor='primary.softColor' sx={{ display: 'flex', alignItems: 'center' }}>
+                        <span>{mime /*.replace('image/', 'img: ')*/} · {resolution} · {
+                          dataRef.reftype !== 'dblob' ? '(remote)'
+                            : !dataRef.bytesSize ? 'no size'
+                              : humanReadableBytes(dataRef.bytesSize)} &nbsp;</span>
+                        {/*<Chip component='span' size={isOutputMultiple ? 'sm' : 'md'} color='primary' variant='outlined' startDecorator={<VisibilityIcon />}*/}
+                        {/*      onClick={(event) => handleViewImageRefPart(event, legacyImageRefPart)}>*/}
+                        {/*  view*/}
+                        {/*</Chip>*/}
+                        {/*{isOutputMultiple && <Chip component='span' size={isOutputMultiple ? 'sm' : 'md'} color='danger' variant='outlined' startDecorator={<DeleteForeverIcon />} onClick={(event) => handleDeleteOutputFragment(event, index)}>*/}
+                        {/*  del*/}
+                        {/*</Chip>}*/}
+                        <ButtonGroup size='sm' color='primary' variant='outlined' sx={actionButtonsSx}>
+                          <Button
+                            startDecorator={<VisibilityIcon sx={{ fontSize: 'md' }} />}
+                            onClick={(event) => handleViewImageRefPart(event, legacyImageRefPart)}
+                          >
+                            view
+                          </Button>
+                          {isOutputMultiple && (
+                            <Button
+                              color='warning'
+                              endDecorator={<DeleteOutlineIcon sx={{ fontSize: 'md' }} />}
+                              onClick={(event) => handleDeleteOutputFragment(event, index)}
+                              // sx={{ width: 48 }}
+                            >
+                              del
+                            </Button>
+                          )}
+                        </ButtonGroup>
                      </Typography>
                    );
                  } else {
                    return (
-                      <Typography key={index} level='body-sm' sx={{ color: 'text.primary' }} startDecorator={<ReadMoreIcon sx={indicatorSx} />}>
+                      <Typography key={index} level='body-sm' textColor='primary.softColor'>
                        {(part as DMessageAttachmentFragment['part']).pt}: (other)
                      </Typography>
                    );
@@ -388,8 +445,8 @@ export function LLMAttachmentMenu(props: {
                })
              )}
              {!!llmTokenCountApprox && (
-                <Typography level='body-xs' mt={0.5} sx={indicatorGapSx}>
-                  ~{llmTokenCountApprox.toLocaleString()} tokens
+                <Typography level='body-xs' mt={0.5} textColor='primary.softColor'>
+                  &nbsp; ~ {llmTokenCountApprox.toLocaleString()} tokens
                </Typography>
              )}
            </Box>
@@ -11,6 +11,7 @@ import { convert_Base64DataURL_To_Base64WithMimeType, convert_Base64WithMimeType
 import { getDomainModelConfiguration } from '~/common/stores/llms/hooks/useModelDomain';
 import { htmlTableToMarkdown } from '~/common/util/htmlTableToMarkdown';
 import { humanReadableHyphenated } from '~/common/util/textUtils';
+import { ocrImageWithProgress, ocrPdfPagesWithProgress } from '~/common/util/ocrUtils';
 import { pdfToImageDataURLs, pdfToText } from '~/common/util/pdfUtils';

 import { createDMessageDataInlineText, createDocAttachmentFragment, DMessageAttachmentFragment, DMessageDataInline, DMessageDocPart, DVMimeType, isContentOrAttachmentFragment, isDocPart, specialContentPartToDocAttachmentFragment } from '~/common/stores/chat/chat.fragments';
@@ -28,7 +29,8 @@ const ENABLE_TEXT_AND_IMAGES = false; // [PROD] ?
 const DOCPART_DEFAULT_VERSION = 1;

 // PDF text extraction quality thresholds
-const PDF_LOW_TEXT_THRESHOLD = 100; // chars per page - below this, consider the PDF as scanned/image-based
+const IMAGE_LOW_TEXT_THRESHOLD = 80; // chars per image - below this, consider the image as low-text (photo-like) rather than document-like
+const PDF_LOW_TEXT_THRESHOLD = 160; // chars per page - below this, consider the PDF as scanned/image-based
 const PDF_FALLBACK_MAX_IMAGES = 32; // max pages to convert to images when auto-falling back (to respect LLM limits)


@@ -288,16 +290,18 @@ export function attachmentDefineConverters(source: AttachmentDraftSource, input:
      converters.push({ id: 'image-original', name: 'Image (original quality)', disabled: !inputImageMimeSupported });
      if (!inputImageMimeSupported)
        converters.push({ id: 'image-to-default', name: `As Image (${PLATFORM_IMAGE_MIMETYPE})` });
-      converters.push({ id: 'image-caption', name: 'Caption (Text)', disabled: visionModelMissing });
+      converters.push({ id: 'image-caption', name: 'AI Caption (Text)', disabled: visionModelMissing });
      converters.push({ id: 'unhandled', name: 'No Image' });
      converters.push({ id: 'image-ocr', name: 'Add Text (OCR)', isCheckbox: true });
      break;

    // PDF
    case mimeTypeIsPDF(input.mimeType):
-      converters.push({ id: 'pdf-text', name: 'PDF To Text', isActive: !autoAddImages || undefined });
-      converters.push({ id: 'pdf-images', name: 'PDF To Images' });
-      converters.push({ id: 'pdf-text-and-images', name: 'PDF Text & Images (best)', isActive: autoAddImages });
+      converters.push({ id: 'pdf-auto', name: 'Auto', isActive: !autoAddImages });
+      converters.push({ id: 'pdf-text', name: 'PDF Text' });
+      converters.push({ id: 'pdf-images-ocr', name: 'PDF -> OCR (for scans)' });
+      converters.push({ id: 'pdf-images', name: 'PDF -> Images' });
+      converters.push({ id: 'pdf-text-and-images', name: 'PDF -> Text + Images', isActive: autoAddImages });
      break;

    // DOCX
@@ -483,6 +487,8 @@ export async function attachmentPerformConversion(
  edit(attachment.id, {
    outputsConverting: true,
    outputsConversionProgress: null,
+    outputWarnings: undefined,
+    outputsHeuristic: undefined,
  });

  // apply converter to the input
@@ -575,23 +581,14 @@ export async function attachmentPerformConversion(
      case 'image-ocr':
        if (!_expectBlob(input.data, 'Image OCR converter')) break;
        try {
-          let lastProgress = -1;
-          const { recognize } = await import('tesseract.js');
-          const result = await recognize(input.data, undefined, {
-            errorHandler: e => console.error(e),
-            logger: (message) => {
-              if (message.status === 'recognizing text') {
-                if (message.progress > lastProgress + 0.01) {
-                  lastProgress = message.progress;
-                  edit(attachment.id, { outputsConversionProgress: lastProgress });
-                }
-              }
-            },
-          });
-          const imageText = result.data.text;
+          // Image -> OCR -> Inline text doc
+          const imageText = await ocrImageWithProgress(input.data, (progress) => edit(attachment.id, { outputsConversionProgress: progress }));
          newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(imageText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'image' }));
+          // warn if very little text was extracted (likely a photo/diagram rather than text)
+          if (imageText.trim().length < IMAGE_LOW_TEXT_THRESHOLD)
+            edit(attachment.id, { outputWarnings: ['Very little text extracted - this image may not contain readable text.'] });
        } catch (error) {
-          console.error(error);
+          console.error('[Image OCR Error]', error);
        }
        break;

@@ -620,65 +617,111 @@ export async function attachmentPerformConversion(
        } catch (error: any) {
          console.log('[DEV] Failed to caption image:', error);
          const errorText = `[Captioning failed: ${error?.message || String(error)}]`;
+          edit(attachment.id, { outputWarnings: [errorText] });
          newFragments.push(createDocAttachmentFragment(title, caption + ' (Error)', DVMimeType.TextPlain, createDMessageDataInlineText(errorText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'image-caption' }));
        }
        break;


-      // pdf to text (with auto-fallback to images for scanned/image-based PDFs)
-      case 'pdf-text':
-        if (!_expectBlob(input.data, 'PDF text converter')) break;
+      // pdf-auto: intelligent conversion with fallback chain (text → OCR → images)
+      case 'pdf-auto':
+        if (!_expectBlob(input.data, 'PDF auto converter')) break;
        try {
-          // Convert Blob to ArrayBuffer for PDF.js
+          // Phase 1: Try text extraction (0-20% progress)
          const pdfArrayBuffer = await input.data.arrayBuffer();

-          // Extract text with quality metadata
+          // [pdf-text] Extract text with quality metadata
          const pdfTextResult = await pdfToText(pdfArrayBuffer, (progress: number) => {
-            // Reserve 0-30% for text extraction attempt, 30-100% for potential image fallback
-            edit(attachment.id, { outputsConversionProgress: progress * 0.3 });
+            // Reserve 0-20% for text extraction attempt, 20-100% for potential image fallback
+            edit(attachment.id, { outputsConversionProgress: progress * 0.2 });
          });

          // Check text density to detect scanned/image-based PDFs
          if (pdfTextResult.avgCharsPerPage >= PDF_LOW_TEXT_THRESHOLD) {
            // Good text extraction - use it
            newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfTextResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+            edit(attachment.id, {
+              outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-text', explain: `${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page` },
+            });
          } else {
-            // Low text density detected - auto-fallback to images
-            console.log(`[PDF] Low text density (${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page across ${pdfTextResult.pageCount} pages), falling back to images`);
+            // Low text density - try OCR
+            // console.log(`[PDF Auto] Low text density (${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page), trying OCR...`);

-            // Need fresh ArrayBuffer for image rendering (previous one may be consumed)
+            // [pdf-images] Phase 2: Render pages to images (20-40% progress)
            const pdfArrayBufferForImages = await input.data.arrayBuffer();
            const imageDataURLs = await pdfToImageDataURLs(pdfArrayBufferForImages, PLATFORM_IMAGE_MIMETYPE, PDF_IMAGE_QUALITY, PDF_IMAGE_PAGE_SCALE, (progress) => {
-              edit(attachment.id, { outputsConversionProgress: 0.3 + progress * 0.7 }); // 30-100%
+              edit(attachment.id, { outputsConversionProgress: 0.2 + progress * 0.2 });
            });

-            // Limit pages to respect LLM image limits
-            const pagesToAttach = Math.min(imageDataURLs.length, PDF_FALLBACK_MAX_IMAGES);
-            for (let i = 0; i < pagesToAttach; i++) {
-              const pdfPageImage = imageDataURLs[i];
-              const pdfPageImageF = await imageDataToImageAttachmentFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `${title} (pg. ${i + 1})`, caption, false, false);
-              if (pdfPageImageF)
-                newFragments.push(pdfPageImageF);
+            // Limit pages for OCR (performance)
+            const pagesToProcess = Math.min(imageDataURLs.length, PDF_FALLBACK_MAX_IMAGES);
+            const imagesToOcr = imageDataURLs.slice(0, pagesToProcess);
+
+            // Phase 3: Try OCR on rendered pages (40-90% progress)
+            try {
+              // [pdf-images-ocr] OCR the images
+              const ocrResult = await ocrPdfPagesWithProgress(imagesToOcr, (progress) => {
+                edit(attachment.id, { outputsConversionProgress: 0.4 + progress * 0.5 });
+              });
+
+              if (ocrResult.avgCharsPerPage >= PDF_LOW_TEXT_THRESHOLD) {
+                // OCR yielded good text - use it
+                newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(ocrResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+                const truncNote = pdfTextResult.pageCount > pagesToProcess ? ` (${pagesToProcess}/${pdfTextResult.pageCount} pages)` : '';
+                edit(attachment.id, {
+                  outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-images-ocr', explain: /*OCR extracted */`${ocrResult.avgCharsPerPage.toFixed(0)} chars/page${truncNote}` },
+                });
+              } else {
+                // OCR also yielded poor results - fall back to images
+                // console.log(`[PDF Auto] OCR also sparse (${ocrResult.avgCharsPerPage.toFixed(0)} chars/page), falling back to images`);
+                for (let i = 0; i < pagesToProcess; i++) {
+                  const pdfPageImage = imageDataURLs[i];
+                  const pdfPageImageF = await imageDataToImageAttachmentFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `${title} (pg. ${i + 1})`, caption, false, false);
+                  if (pdfPageImageF)
+                    newFragments.push(pdfPageImageF);
+                }
+                const truncNote = pdfTextResult.pageCount > pagesToProcess ? ` (${pagesToProcess}/${pdfTextResult.pageCount} pages)` : '';
+                edit(attachment.id, {
+                  outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-images', explain: `not a text page${truncNote}` },
+                });
+              }
+            } catch (ocrError) {
+              // OCR failed - fall back to images
+              console.warn('[PDF Auto] OCR failed, falling back to images:', ocrError);
+              for (let i = 0; i < pagesToProcess; i++) {
+                const pdfPageImage = imageDataURLs[i];
+                const pdfPageImageF = await imageDataToImageAttachmentFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `${title} (pg. ${i + 1})`, caption, false, false);
+                if (pdfPageImageF)
+                  newFragments.push(pdfPageImageF);
+              }
+              edit(attachment.id, {
+                outputsHeuristic: { isAuto: true, actualConverterId: 'pdf-images', explain: 'OCR failed, attached as images' },
+              });
            }
-
-            // Set fallback info for UI display
-            const truncatedNote = pdfTextResult.pageCount > PDF_FALLBACK_MAX_IMAGES
-              ? ` (first ${pagesToAttach} of ${pdfTextResult.pageCount} pages)`
-              : '';
-            edit(attachment.id, {
-              conversionFallback: {
-                from: 'pdf-text',
-                to: 'pdf-images',
-                reason: `Low text density (${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page) - converted to images${truncatedNote}`,
-              },
-            });
          }
+        } catch (error) {
+          console.error('Error in PDF auto conversion:', error);
+        }
+        break;
+
+      // pdf-text: strict text extraction, no fallback (honors user choice)
+      case 'pdf-text':
+        if (!_expectBlob(input.data, 'PDF text converter')) break;
+        try {
+          const pdfTextResult = await pdfToText(await input.data.arrayBuffer(), progress => edit(attachment.id, { outputsConversionProgress: progress }));
+          // Always output text, even if sparse (user explicitly chose this)
+          newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfTextResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+          edit(attachment.id, {
+            // warn if very little text was extracted (likely a scanned PDF)
+            outputWarnings: pdfTextResult.avgCharsPerPage >= 20 ? undefined : ['Very little text extracted - this PDF may be scanned. Try "Auto" or "OCR (for scans)" mode.'],
+            outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-text', explain: `${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page` },
+          });
        } catch (error) {
          console.error('Error in PDF text extraction:', error);
        }
        break;

-      // pdf to images
+      // pdf-images: render all pages as images (honors user choice)
      case 'pdf-images':
        if (!_expectBlob(input.data, 'PDF images converter')) break;
        // Convert Blob to ArrayBuffer for PDF.js
@@ -691,11 +734,39 @@ export async function attachmentPerformConversion(
            if (pdfPageImageF)
              newFragments.push(pdfPageImageF);
          }
+          edit(attachment.id, {
+            outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-images', explain: `${imageDataURLs.length} pages` },
+          });
        } catch (error) {
          console.error('Error converting PDF to images:', error);
        }
        break;

+      // pdf-images-ocr: force OCR on all pages (for scanned documents)
+      case 'pdf-images-ocr':
+        if (!_expectBlob(input.data, 'PDF OCR converter')) break;
+        try {
+          // Render pages to images (0-40% progress)
+          const imageDataURLs = await pdfToImageDataURLs(await input.data.arrayBuffer(), PLATFORM_IMAGE_MIMETYPE, PDF_IMAGE_QUALITY, PDF_IMAGE_PAGE_SCALE, (progress) => {
+            edit(attachment.id, { outputsConversionProgress: progress * 0.4 });
+          });
+
+          // OCR all pages (40-100% progress)
+          const ocrResult = await ocrPdfPagesWithProgress(imageDataURLs, (progress) => {
+            edit(attachment.id, { outputsConversionProgress: 0.4 + progress * 0.6 });
+          });
+
+          newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(ocrResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
+          edit(attachment.id, {
+            // warn if very little text was extracted (likely a scanned PDF)
+            outputWarnings: ocrResult.avgCharsPerPage >= 20 ? undefined : ['Very little text extracted via OCR - this PDF may contain mostly images/diagrams.'],
+            outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-images-ocr', explain: `${ocrResult.avgCharsPerPage.toFixed(0)} chars/page from ${ocrResult.pageCount} pages` },
+          });
+        } catch (error) {
+          console.error('Error in PDF OCR:', error);
+        }
+        break;
+
      // pdf to text and images
      case 'pdf-text-and-images':
        if (!_expectBlob(input.data, 'PDF text and images converter')) break;
@@ -725,10 +796,13 @@ export async function attachmentPerformConversion(
            const textFragment = createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfTextResult.text, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' });
            newFragments.push(textFragment);
          }
-          // Note: if text is sparse, images are still attached (user explicitly chose text+images), so we don't consider density here
+          // Note: if text is sparse, images are still attached (user explicitly chose text+images)

          // Add the text fragment first, then the image fragments
          newFragments.push(...imageFragments);
+          edit(attachment.id, {
+            outputsHeuristic: { isAuto: false, actualConverterId: 'pdf-text-and-images', explain: `${pdfTextResult.avgCharsPerPage.toFixed(0)} chars/page + ${imageFragments.length} images` },
+          });
        } catch (error) {
          console.error('Error converting PDF to text and images:', error);
        }
@@ -24,11 +24,14 @@ export type AttachmentDraft = {
  outputsConversionProgress: number | null;
  outputFragments: DMessageAttachmentFragment[];

-  // Auto-fallback info: set when a converter auto-switches due to quality issues (e.g., PDF with low text density)
-  conversionFallback?: {
-    from: AttachmentDraftConverterType;
-    to: AttachmentDraftConverterType;
-    reason: string;
+  // Warnings for poor conversions (e.g. scanned PDF with text extraction rather than OCR)
+  outputWarnings?: string[];
+
+  // Tracks what method was actually used (especially for Auto mode)
+  outputsHeuristic?: {
+    isAuto: boolean;
+    actualConverterId: AttachmentDraftConverterType;
+    explain?: string; // e.g., "42 chars/page detected"
  };

  // metadata: {
@@ -144,7 +147,7 @@ export type AttachmentDraftConverter = {
 export type AttachmentDraftConverterType =
  | 'text' | 'rich-text' | 'rich-text-cleaner' | 'rich-text-table'
  | 'image-original' | 'image-resized-high' | 'image-resized-low' | 'image-ocr' | 'image-caption' | 'image-to-default'
-  | 'pdf-text' | 'pdf-images' | 'pdf-text-and-images'
+  | 'pdf-auto' | 'pdf-text' | 'pdf-images' | 'pdf-images-ocr' | 'pdf-text-and-images'
  | 'docx-to-html'
  | 'url-page-text' | 'url-page-markdown' | 'url-page-html' | 'url-page-null' | 'url-page-image'
  | 'youtube-transcript' | 'youtube-transcript-simple'