Attachments: remove DBlobs when setting outputFragments, until GC comes

This commit is contained in:
Enrico Ros
2024-06-03 09:21:58 -07:00
parent bfdb9c2624
commit 8db2a37a59
3 changed files with 105 additions and 40 deletions
@@ -1,4 +1,4 @@
import { addDBlobItem } from '~/modules/dblobs/dblobs.db';
import { addDBlobItem, deleteDBlobItem } from '~/modules/dblobs/dblobs.db';
import { createDBlobImageItem } from '~/modules/dblobs/dblobs.types';
import { convertBase64Image, getImageDimensions, LLMImageResizeMode, resizeBase64ImageIfNeeded } from '~/common/util/imageUtils';
@@ -89,4 +89,13 @@ export async function attachmentImageToFragmentViaDBlob(mimeType: string, inputD
console.error('imageAttachment: Error processing image:', error);
return null;
}
}
}
/**
* Remove the DBlob item associated with the given DMessageAttachmentFragment
*/
export async function removeDBlobItemFromAttachmentFragment(fragment: DMessageAttachmentFragment) {
if (fragment.part.pt === 'image_ref' && fragment.part.dataRef.reftype === 'dblob') {
await deleteDBlobItem(fragment.part.dataRef.dblobId);
}
}
@@ -7,6 +7,7 @@ import { pdfToImageDataURLs, pdfToText } from '~/common/util/pdfUtils';
import { createTextAttachmentFragment, DMessageAttachmentFragment } from '~/common/stores/chat/chat.message';
import type { AttachmentDraft, AttachmentDraftConverter, AttachmentDraftInput, AttachmentDraftSource } from './attachment.types';
import type { AttachmentsDraftsStore } from './store-attachment-drafts-slice';
import { attachmentImageToFragmentViaDBlob } from './attachment.dblobs';
@@ -113,7 +114,7 @@ export function attachmentCreate(source: AttachmentDraftSource): AttachmentDraft
* @param {Readonly<AttachmentDraftSource>} source - The source of the attachment.
* @param {(changes: Partial<AttachmentDraft>) => void} edit - A function to edit the AttachmentDraft object.
*/
export async function attachmentLoadInputAsync(source: Readonly<AttachmentDraftSource>, edit: (changes: Partial<AttachmentDraft>) => void) {
export async function attachmentLoadInputAsync(source: Readonly<AttachmentDraftSource>, edit: (changes: Partial<Omit<AttachmentDraft, 'outputFragments'>>) => void) {
edit({ inputLoading: true });
switch (source.media) {
@@ -218,7 +219,7 @@ export async function attachmentLoadInputAsync(source: Readonly<AttachmentDraftS
* @param {Readonly<AttachmentDraftInput>} input - The input of the AttachmentDraft object.
* @param {(changes: Partial<AttachmentDraft>) => void} edit - A function to edit the AttachmentDraft object.
*/
export function attachmentDefineConverters(sourceType: AttachmentDraftSource['media'], input: Readonly<AttachmentDraftInput>, edit: (changes: Partial<AttachmentDraft>) => void) {
export function attachmentDefineConverters(sourceType: AttachmentDraftSource['media'], input: Readonly<AttachmentDraftInput>, edit: (changes: Partial<Omit<AttachmentDraft, 'outputFragments'>>) => void) {
// return all the possible converters for the input
const converters: AttachmentDraftConverter[] = [];
@@ -281,16 +282,22 @@ export function attachmentDefineConverters(sourceType: AttachmentDraftSource['me
*
* @param {Readonly<AttachmentDraft>} attachment - The AttachmentDraft object to convert.
* @param {number | null} converterIdx - The index of the selected converter.
* @param {(changes: Partial<AttachmentDraft>) => void} edit - A function to edit the AttachmentDraft object.
* @param edit - A function to edit the AttachmentDraft object.
* @param replaceOutputFragments - A function to replace the output fragments of the AttachmentDraft object.
*/
export async function attachmentPerformConversion(attachment: Readonly<AttachmentDraft>, converterIdx: number | null, edit: (changes: Partial<AttachmentDraft>) => void) {
export async function attachmentPerformConversion(
attachment: Readonly<AttachmentDraft>,
converterIdx: number | null,
edit: AttachmentsDraftsStore['_editAttachment'],
replaceOutputFragments: AttachmentsDraftsStore['_replaceAttachmentOutputFragments'],
) {
// set converter index
converterIdx = (converterIdx !== null && converterIdx >= 0 && converterIdx < attachment.converters.length) ? converterIdx : null;
edit({
edit(attachment.id, {
converterIdx: converterIdx,
outputFragments: [],
});
replaceOutputFragments(attachment.id, []);
// get converter
const { source, ref, input } = attachment;
@@ -298,23 +305,23 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
if (!converter || !input)
return;
edit({
edit(attachment.id, {
outputsConverting: true,
});
// apply converter to the input
const outputFragments: DMessageAttachmentFragment[] = [];
const newFragments: DMessageAttachmentFragment[] = [];
switch (converter.id) {
// text as-is
case 'text':
outputFragments.push(createTextAttachmentFragment(inputDataToString(input.data), ref));
newFragments.push(createTextAttachmentFragment(inputDataToString(input.data), ref));
break;
// html as-is
case 'rich-text':
outputFragments.push(createTextAttachmentFragment(input.altData!, ref || '\n<!DOCTYPE html>'));
newFragments.push(createTextAttachmentFragment(input.altData!, ref || '\n<!DOCTYPE html>'));
break;
// html to markdown table
@@ -326,7 +333,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
// fallback to text/plain
mdTable = inputDataToString(input.data);
}
outputFragments.push(createTextAttachmentFragment(mdTable, ref));
newFragments.push(createTextAttachmentFragment(mdTable, ref));
break;
// image resized (default mime/quality, openai-high-res)
@@ -337,7 +344,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
}
const imageHighF = await attachmentImageToFragmentViaDBlob(input.mimeType, input.data, source, ref, ref, false, 'openai-high-res');
if (imageHighF)
outputFragments.push(imageHighF);
newFragments.push(imageHighF);
break;
// image resized (default mime/quality, openai-low-res)
@@ -348,7 +355,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
}
const imageLowF = await attachmentImageToFragmentViaDBlob(input.mimeType, input.data, source, ref, ref, false, 'openai-low-res');
if (imageLowF)
outputFragments.push(imageLowF);
newFragments.push(imageLowF);
break;
// image as-is
@@ -359,7 +366,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
}
const imageOrigF = await attachmentImageToFragmentViaDBlob(input.mimeType, input.data, source, ref, ref, false, false);
if (imageOrigF)
outputFragments.push(imageOrigF);
newFragments.push(imageOrigF);
break;
// image converted (potentially unsupported mime)
@@ -370,7 +377,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
}
const imageCastF = await attachmentImageToFragmentViaDBlob(input.mimeType, input.data, source, ref, ref, DEFAULT_ADRAFT_IMAGE_MIMETYPE, false);
if (imageCastF)
outputFragments.push(imageCastF);
newFragments.push(imageCastF);
break;
// image to text
@@ -390,7 +397,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
},
});
const imageText = result.data.text;
outputFragments.push(createTextAttachmentFragment(imageText, ref));
newFragments.push(createTextAttachmentFragment(imageText, ref));
} catch (error) {
console.error(error);
}
@@ -406,7 +413,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
// duplicate the ArrayBuffer to avoid mutation
const pdfData = new Uint8Array(input.data.slice(0));
const pdfText = await pdfToText(pdfData);
outputFragments.push(createTextAttachmentFragment(pdfText, ref));
newFragments.push(createTextAttachmentFragment(pdfText, ref));
break;
// pdf to images
@@ -420,9 +427,9 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
try {
const imageDataURLs = await pdfToImageDataURLs(pdfData2, DEFAULT_ADRAFT_IMAGE_MIMETYPE, PDF_IMAGE_QUALITY, PDF_IMAGE_PAGE_SCALE);
for (const pdfPageImage of imageDataURLs) {
const pdfPageImageF = await attachmentImageToFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `Page ${outputFragments.length + 1}`, ref, false, false);
const pdfPageImageF = await attachmentImageToFragmentViaDBlob(pdfPageImage.mimeType, pdfPageImage.base64Data, source, `Page ${newFragments.length + 1}`, ref, false, false);
if (pdfPageImageF)
outputFragments.push(pdfPageImageF);
newFragments.push(pdfPageImageF);
}
} catch (error) {
console.error('Error converting PDF to images:', error);
@@ -432,7 +439,7 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
// self: message
case 'ego-message-md':
outputFragments.push(createTextAttachmentFragment(inputDataToString(input.data), ref));
newFragments.push(createTextAttachmentFragment(inputDataToString(input.data), ref));
break;
case 'unhandled':
@@ -441,9 +448,9 @@ export async function attachmentPerformConversion(attachment: Readonly<Attachmen
}
// update
edit({
replaceOutputFragments(attachment.id, newFragments);
edit(attachment.id, {
outputsConverting: false,
outputFragments,
});
}
@@ -5,6 +5,7 @@ import type { DMessageAttachmentFragment } from '~/common/stores/chat/chat.messa
import type { AttachmentDraft, AttachmentDraftId, AttachmentDraftSource } from './attachment.types';
import { attachmentCreate, attachmentDefineConverters, attachmentLoadInputAsync, attachmentPerformConversion } from './attachment.pipeline';
import { removeDBlobItemFromAttachmentFragment } from './attachment.dblobs';
/// Attachment Draft Slice: per-conversation attachments store ///
@@ -34,7 +35,8 @@ export interface AttachmentsDraftsStore extends AttachmentDraftsState {
*/
takeTextFragments: (attachmentDraftId: AttachmentDraftId | null, removeFragments: boolean) => DMessageAttachmentFragment[];
_editAttachment: (attachmentDraftId: AttachmentDraftId, update: Partial<AttachmentDraft> | ((attachment: AttachmentDraft) => Partial<AttachmentDraft>)) => void;
_editAttachment: (attachmentDraftId: AttachmentDraftId, update: Partial<Omit<AttachmentDraft, 'outputFragments'>> | ((attachment: AttachmentDraft) => Partial<Omit<AttachmentDraft, 'outputFragments'>>)) => void;
_replaceAttachmentOutputFragments: (attachmentDraftId: AttachmentDraftId, outputFragments: DMessageAttachmentFragment[]) => void;
_getAttachment: (attachmentDraftId: AttachmentDraftId) => AttachmentDraft | undefined;
}
@@ -56,7 +58,7 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
}));
const attachmentDraftId = _attachmentDraft.id;
const editFn = (changes: Partial<AttachmentDraft>) => _editAttachment(attachmentDraftId, changes);
const editFn = (changes: Partial<Omit<AttachmentDraft, 'outputFragments'>>) => _editAttachment(attachmentDraftId, changes);
// 1.Resolve the Input
await attachmentLoadInputAsync(source, editFn);
@@ -75,13 +77,36 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
await setAttachmentDraftConverterIdxAndConvert(attachmentDraftId, firstEnabledIndex > -1 ? firstEnabledIndex : 0);
},
clearAttachmentsDrafts: () => _set({
attachmentDrafts: [],
}),
clearAttachmentsDrafts: () =>
_set(_state => {
// NOTE: commented because right now the attachments are not moved to a different scope
// because this function is actually used to clear the attachments when the message is sent
// TODO: do not use clearAttachments when the message is sent, figure out another way0
// Remove the DBlob items associated with the removed fragments
// for (let draft of _state.attachmentDrafts) {
// for (let fragment of draft.outputFragments) {
// void removeDBlobItemFromAttachmentFragment(fragment);
// }
// }
return {
attachmentDrafts: [],
};
}),
removeAttachmentDraft: (attachmentDraftId: AttachmentDraftId) =>
_set(state => ({
attachmentDrafts: state.attachmentDrafts.filter(attachment => attachment.id !== attachmentDraftId),
attachmentDrafts: state.attachmentDrafts.filter(attachment => {
if (attachment.id !== attachmentDraftId)
return true;
// Remove the DBlob items associated with the removed fragments
for (let removedFragment of attachment.outputFragments) {
void removeDBlobItemFromAttachmentFragment(removedFragment);
}
// Remove the draft
return false;
}),
})),
moveAttachmentDraft: (attachmentDraftId: AttachmentDraftId, delta: 1 | -1) =>
@@ -101,14 +126,12 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
}),
setAttachmentDraftConverterIdxAndConvert: async (attachmentDraftId: AttachmentDraftId, converterIdx: number | null) => {
const { _getAttachment, _editAttachment } = _get();
const { _getAttachment, _editAttachment, _replaceAttachmentOutputFragments } = _get();
const attachmentDraft = _getAttachment(attachmentDraftId);
if (!attachmentDraft || attachmentDraft.converterIdx === converterIdx)
return;
const editFn = (changes: Partial<AttachmentDraft>) => _editAttachment(attachmentDraftId, changes);
await attachmentPerformConversion(attachmentDraft, converterIdx, editFn);
await attachmentPerformConversion(attachmentDraft, converterIdx, _editAttachment, _replaceAttachmentOutputFragments);
},
takeAllFragments: (removeFragments: boolean): DMessageAttachmentFragment[] => {
@@ -137,7 +160,7 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
continue;
}
// Extract
// Extract text fragments
const extractedTextFragments = draft.outputFragments.filter(fragment => fragment.part.pt === 'text');
textFragments.push(...extractedTextFragments);
@@ -147,12 +170,17 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
continue;
}
// Remove text fragments
const remainingFragments = draft.outputFragments.filter(fragment => fragment.part.pt !== 'text');
if (remainingFragments.length || draft.outputsConverting) {
// Removal: rmeove associated DBlob items
for (let removedFragment of extractedTextFragments) {
void removeDBlobItemFromAttachmentFragment(removedFragment);
}
// Removal: leave non-text fragments in the draft
const keptFragments = draft.outputFragments.filter(fragment => fragment.part.pt !== 'text');
if (keptFragments.length || draft.outputsConverting) {
keptDrafts.push({
...draft,
outputFragments: remainingFragments,
outputFragments: keptFragments,
});
}
}
@@ -166,7 +194,7 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
return textFragments;
},
_editAttachment: (attachmentDraftId: AttachmentDraftId, update: Partial<AttachmentDraft> | ((attachment: AttachmentDraft) => Partial<AttachmentDraft>)) =>
_editAttachment: (attachmentDraftId: AttachmentDraftId, update: Partial<Omit<AttachmentDraft, 'outputFragments'>> | ((attachment: AttachmentDraft) => Partial<Omit<AttachmentDraft, 'outputFragments'>>)) =>
_set(state => ({
attachmentDrafts: state.attachmentDrafts.map((attachmentDraft: AttachmentDraft): AttachmentDraft =>
attachmentDraft.id === attachmentDraftId
@@ -175,6 +203,27 @@ export const createAttachmentDraftsStoreSlice: StateCreator<AttachmentsDraftsSto
),
})),
_replaceAttachmentOutputFragments: (attachmentDraftId: AttachmentDraftId, outputFragments: DMessageAttachmentFragment[]) =>
_set(state => ({
attachmentDrafts: state.attachmentDrafts.map((attachmentDraft: AttachmentDraft): AttachmentDraft => {
if (attachmentDraft.id !== attachmentDraftId)
return attachmentDraft;
// find the removed fragments
const removedFragments = attachmentDraft.outputFragments.filter(f => !outputFragments.includes(f));
// remove the DBlob items associated with the removed fragments
for (let removedFragment of removedFragments) {
void removeDBlobItemFromAttachmentFragment(removedFragment);
}
return {
...attachmentDraft,
outputFragments,
};
}),
})),
_getAttachment: (attachmentDraftId: AttachmentDraftId) =>
_get().attachmentDrafts.find(a => a.id === attachmentDraftId),