Doc Part/Aix: support version

This commit is contained in:
Enrico Ros
2024-10-22 15:18:52 -07:00
parent b3cf624a39
commit 934f29dcfd
7 changed files with 46 additions and 32 deletions
@@ -156,14 +156,16 @@ export function DocAttachmentFragment(props: {
<Box sx={enhancedCodePanelTitleTooltipSx}>
<div>Attachment Title</div>
<div>{fragment.title}</div>
<div>Doc Title</div>
<div>{fragmentDocPart.l1Title}</div>
<div>Identifier</div>
<div>{fragmentDocPart.ref}</div>
<div>Render type</div>
<div>{fragmentDocPart.vdt}</div>
<div>Doc Title</div>
<div>{fragmentDocPart.l1Title}</div>
<div>Doc Version</div>
<div>{fragmentDocPart.version || '(none)'}</div>
<div>Text Mime type</div>
<div>{fragmentDocPart.data?.mimeType || '(unknown)'}</div>
<div>Render type</div>
<div>{fragmentDocPart.vdt}</div>
<div>Text Buffer Id</div>
<div>{fragmentId}</div>
</Box>
@@ -73,12 +73,14 @@ export function ViewDocPartModal(props: {
<div>{docPart.l1Title}</div>
<div>Identifier</div>
<div>{docPart.ref}</div>
<div>Mime type</div>
<div>Mime Type</div>
<div>{docPart.data?.mimeType || '(unknown)'}</div>
<div>Render type</div>
<div>Render Type</div>
<div>{docPart.vdt}</div>
<div>Rendering as</div>
<div>Rendering As</div>
<div>{renderAsMarkdown ? 'Markdown' : /*renderAsCode ? 'Code' :*/ 'Text'} (auto)</div>
<div>Doc Version</div>
<div>{docPart.version || '(none)'}</div>
</Box>
</Box>
@@ -25,6 +25,7 @@ export const DEFAULT_ADRAFT_IMAGE_QUALITY = 0.96;
const PDF_IMAGE_PAGE_SCALE = 1.5;
const PDF_IMAGE_QUALITY = 0.5;
const ENABLE_TEXT_AND_IMAGES = false; // 2.0
const DOCPART_DEFAULT_VERSION = 1;
// internal mimes, only used to route data within us (source -> input -> converters)
@@ -466,7 +467,7 @@ export async function attachmentPerformConversion(
case 'text':
const possibleLiveFileId = await attachmentGetLiveFileId(source);
const textualInlineData = createDMessageDataInlineText(_inputDataToString(input.data), input.mimeType);
newFragments.push(createDocAttachmentFragment(title, caption, _guessDocVDT(input.mimeType), textualInlineData, refString, docMeta, possibleLiveFileId));
newFragments.push(createDocAttachmentFragment(title, caption, _guessDocVDT(input.mimeType), textualInlineData, refString, DOCPART_DEFAULT_VERSION, docMeta, possibleLiveFileId));
break;
// html as-is
@@ -474,7 +475,7 @@ export async function attachmentPerformConversion(
// NOTE: before we had the following: createTextAttachmentFragment(ref || '\n<!DOCTYPE html>', input.altData!), which
// was used to wrap the HTML in a code block to facilitate AutoRenderBlocks's parser. Historic note, for future debugging.
const richTextData = createDMessageDataInlineText(input.altData || '', input.altMimeType);
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, richTextData, refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, richTextData, refString, DOCPART_DEFAULT_VERSION, docMeta));
break;
// html cleaned
@@ -487,7 +488,7 @@ export async function attachmentPerformConversion(
// remove svg elements
.replace(/<svg[^>]*>.*?<\/svg>/g, '');
const cleanedHtmlData = createDMessageDataInlineText(cleanerHtml, 'text/html');
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, cleanedHtmlData, refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, cleanedHtmlData, refString, DOCPART_DEFAULT_VERSION, docMeta));
break;
// html to markdown table
@@ -500,7 +501,7 @@ export async function attachmentPerformConversion(
// fallback to text/plain
tableData = createDMessageDataInlineText(_inputDataToString(input.data), input.mimeType);
}
newFragments.push(createDocAttachmentFragment(title, caption, tableData.mimeType === 'text/markdown' ? DVMimeType.TextPlain : DVMimeType.TextPlain, tableData, refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, tableData.mimeType === 'text/markdown' ? DVMimeType.TextPlain : DVMimeType.TextPlain, tableData, refString, DOCPART_DEFAULT_VERSION, docMeta));
break;
@@ -570,7 +571,7 @@ export async function attachmentPerformConversion(
},
});
const imageText = result.data.text;
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(imageText, 'text/plain'), refString, { ...docMeta, srcOcrFrom: 'image' }));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(imageText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'image' }));
} catch (error) {
console.error(error);
}
@@ -592,7 +593,7 @@ export async function attachmentPerformConversion(
// Warn the user if no text is extracted
// edit(attachment.id, { inputError: 'No text found in the PDF file.' });
} else
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfText, 'text/plain'), refString, { ...docMeta, srcOcrFrom: 'pdf' }));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' }));
break;
// pdf to images
@@ -642,7 +643,7 @@ export async function attachmentPerformConversion(
if (pdfText.trim().length < 2) {
// Do not warn the user, as hopefully the images are useful
} else {
const textFragment = createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfText, 'text/plain'), refString, { ...docMeta, srcOcrFrom: 'pdf' });
const textFragment = createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, createDMessageDataInlineText(pdfText, 'text/plain'), refString, DOCPART_DEFAULT_VERSION, { ...docMeta, srcOcrFrom: 'pdf' });
newFragments.push(textFragment);
}
@@ -663,7 +664,7 @@ export async function attachmentPerformConversion(
try {
const { convertDocxToHTML } = await import('./file-converters/DocxToMarkdown');
const { html } = await convertDocxToHTML(input.data);
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, createDMessageDataInlineText(html, 'text/html'), refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, createDMessageDataInlineText(html, 'text/html'), refString, DOCPART_DEFAULT_VERSION, docMeta));
} catch (error) {
console.error('Error in DOCX to Markdown conversion:', error);
}
@@ -677,7 +678,7 @@ export async function attachmentPerformConversion(
break;
}
const pageTextData = createDMessageDataInlineText((input.data as DraftWebInputData).pageText!, 'text/plain');
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, pageTextData, refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, pageTextData, refString, DOCPART_DEFAULT_VERSION, docMeta));
break;
// url page markdown
@@ -687,7 +688,7 @@ export async function attachmentPerformConversion(
break;
}
const pageMarkdownData = createDMessageDataInlineText((input.data as DraftWebInputData).pageMarkdown!, 'text/markdown');
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, pageMarkdownData, refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, pageMarkdownData, refString, DOCPART_DEFAULT_VERSION, docMeta));
break;
// url page html
@@ -697,7 +698,7 @@ export async function attachmentPerformConversion(
break;
}
const pageHtmlData = createDMessageDataInlineText((input.data as DraftWebInputData).pageCleanedHtml!, 'text/html');
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, pageHtmlData, refString, docMeta));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.VndAgiCode, pageHtmlData, refString, DOCPART_DEFAULT_VERSION, docMeta));
break;
// url page null
@@ -739,7 +740,7 @@ export async function attachmentPerformConversion(
converter.id === 'youtube-transcript-simple' ? youtubeData.videoTranscript
: `**YouTube Title**: ${youtubeData.videoTitle}\n\n**YouTube Description**: ${youtubeData.videoDescription}\n\n**YouTube Transcript**:\n${youtubeData.videoTranscript}\n`;
const transcriptTextData = createDMessageDataInlineText(transcriptText, 'text/plain');
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, transcriptTextData, refString, docMeta, undefined));
newFragments.push(createDocAttachmentFragment(title, caption, DVMimeType.TextPlain, transcriptTextData, refString, DOCPART_DEFAULT_VERSION, docMeta, undefined));
break;
+10 -8
View File
@@ -95,7 +95,7 @@ export type DMessageErrorPart = { pt: 'error', error: string };
export type DMessageImageRefPart = { pt: 'image_ref', dataRef: DMessageDataRef, altText?: string, width?: number, height?: number };
export type DMessageDocPart = { pt: 'doc', vdt: DMessageDocMimeType, data: DMessageDataInline, ref: string, l1Title: string, meta?: DMessageDocMeta };
export type DMessageDocPart = { pt: 'doc', vdt: DMessageDocMimeType, data: DMessageDataInline, ref: string, l1Title: string, version?: number, meta?: DMessageDocMeta };
type DMessageDocMimeType =
// | 'application/vnd.agi.ego.fragments' // for attaching messages
// | 'application/vnd.agi.imageRef' // for image attachments with da - NO: makes no sense, as doc contains data
@@ -255,8 +255,8 @@ function _createContentFragment(part: DMessageContentFragment['part']): DMessage
/// Attachment Fragments - Creation & Duplication
export function createDocAttachmentFragment(l1Title: string, caption: string, vdt: DMessageDocMimeType, data: DMessageDataInline, ref: string, meta?: DMessageDocMeta, liveFileId?: LiveFileId): DMessageAttachmentFragment {
return _createAttachmentFragment(l1Title, caption, _create_Doc_Part(vdt, data, ref, l1Title, meta), liveFileId);
export function createDocAttachmentFragment(l1Title: string, caption: string, vdt: DMessageDocMimeType, data: DMessageDataInline, ref: string, version: number, meta?: DMessageDocMeta, liveFileId?: LiveFileId): DMessageAttachmentFragment {
return _createAttachmentFragment(l1Title, caption, _create_Doc_Part(vdt, data, ref, l1Title, version, meta), liveFileId);
}
export function createImageAttachmentFragment(title: string, caption: string, dataRef: DMessageDataRef, imgAltText?: string, width?: number, height?: number): DMessageAttachmentFragment {
@@ -266,11 +266,11 @@ export function createImageAttachmentFragment(title: string, caption: string, da
export function specialContentPartToDocAttachmentFragment(title: string, caption: string, vdt: DMessageDocMimeType, contentPart: DMessageContentFragment['part'], ref: string, docMeta?: DMessageDocMeta): DMessageAttachmentFragment {
switch (true) {
case isTextPart(contentPart):
return createDocAttachmentFragment(title, caption, vdt, createDMessageDataInlineText(contentPart.text, 'text/plain'), ref, docMeta);
return createDocAttachmentFragment(title, caption, vdt, createDMessageDataInlineText(contentPart.text, 'text/plain'), ref, 2 /* As we attach our messages, we start from 2 */, docMeta);
case isImageRefPart(contentPart):
return createImageAttachmentFragment(title, caption, _duplicate_DataReference(contentPart.dataRef), contentPart.altText, contentPart.width, contentPart.height);
default:
return createDocAttachmentFragment('Error', 'Content to Attachment', vdt, createDMessageDataInlineText(`Conversion of '${contentPart.pt}' is not supported yet.`, 'text/plain'), ref, docMeta);
return createDocAttachmentFragment('Error', 'Content to Attachment', vdt, createDMessageDataInlineText(`Conversion of '${contentPart.pt}' is not supported yet.`, 'text/plain'), ref, 1 /* error has no version really */, docMeta);
}
}
@@ -331,8 +331,8 @@ function _create_Error_Part(error: string): DMessageErrorPart {
return { pt: 'error', error };
}
function _create_Doc_Part(vdt: DMessageDocMimeType, data: DMessageDataInline, ref: string, l1Title: string, meta?: DMessageDocMeta): DMessageDocPart {
return { pt: 'doc', vdt, data, ref, l1Title, meta };
function _create_Doc_Part(vdt: DMessageDocMimeType, data: DMessageDataInline, ref: string, l1Title: string, version: number, meta?: DMessageDocMeta): DMessageDocPart {
return { pt: 'doc', vdt, data, ref, l1Title, version, meta };
}
function _create_ImageRef_Part(dataRef: DMessageDataRef, altText?: string, width?: number, height?: number): DMessageImageRefPart {
@@ -366,7 +366,8 @@ function _create_Sentinel_Part(): _SentinelPart {
function _duplicate_Part<TPart extends (DMessageContentFragment | DMessageAttachmentFragment | DMessageVoidFragment)['part']>(part: TPart): TPart {
switch (part.pt) {
case 'doc':
return _create_Doc_Part(part.vdt, _duplicate_InlineData(part.data), part.ref, part.l1Title, part.meta ? { ...part.meta } : undefined) as TPart;
const newDocVersion = Number(part.version || 1); // we don't increase the version on duplication (not sure we should?)
return _create_Doc_Part(part.vdt, _duplicate_InlineData(part.data), part.ref, part.l1Title, newDocVersion, part.meta ? { ...part.meta } : undefined) as TPart;
case 'error':
return _create_Error_Part(part.error) as TPart;
@@ -534,6 +535,7 @@ export function updateFragmentWithEditedText(
part.vdt,
newDataInline,
part.ref,
Number(part.version || 1) + 1, // Increment version as this has been edited - note: we could have used ?? to be more correct, but || is safer
part.meta,
liveFileId,
);
@@ -135,6 +135,9 @@ export namespace AixWire_Parts {
// optional title of the document
l1Title: z.string().optional(),
// version of the document - optional because it's not guaranteed, but strongly suggested
version: z.number().optional(),
// inlined for now as it's only used here; in the TypeScript definition this is DMessageDataInline
data: z.object({
idt: z.literal('text'),
@@ -9,7 +9,7 @@ import { ContentScaling, themeScalingMap } from '~/common/app.theme';
export const enhancedCodePanelTitleTooltipSx: SxProps = {
p: 1,
display: 'grid',
gridTemplateColumns: 'auto 1fr',
gridTemplateColumns: 'auto 1fr auto 1fr',
alignItems: 'center',
columnGap: 2,
rowGap: 1,
@@ -89,16 +89,20 @@ export function EnhancedRenderCode(props: {
{/* This is what we have */}
<div><strong>Code Block</strong></div>
<div></div>
<div>{props.isPartial ? 'Partial ' : 'Complete'}</div>
<div></div>
<div>Title</div>
<div>{props.title || '(empty)'}</div>
<div>Version</div>
<div>{/* TODO props.version ||*/ '(none)'}</div>
{/*<div>Language</div>*/}
{/*<div>{props.language}</div>*/}
<div>Code Lines</div>
<div>{props.code.split('\n').length} lines</div>
<div>Code Length</div>
<div>{props.code.length} characters</div>
<div>semiStableId</div>
<div>{props.semiStableId || '(none)'}</div>
<div>Characters</div>
<div>{props.code.length}</div>
<div>tempId</div>
<div><small>{props.semiStableId || '(none)'}</small></div>
{/* This is what attachments carry */}
{/*<div>Attachment Title</div>*/}
{/*<div>{fragment.title}</div>*/}