mirror of
https://github.com/enricoros/big-AGI.git
synced 2026-05-11 14:10:15 -07:00
Improve block parsing, now with inline images, multiple-interleaved blocks support
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
type Block = CodeBlock | HtmlBlock | ImageBlock | TextBlock;
|
||||
type Block = CodeBlock | HtmlBlock | ImageBlock | /*LatexBlock |*/ TextBlock;
|
||||
export type CodeBlock = { type: 'code'; blockTitle: string; blockCode: string; complete: boolean; };
|
||||
export type HtmlBlock = { type: 'html'; html: string; };
|
||||
export type ImageBlock = { type: 'image'; url: string; };
|
||||
// export type LatexBlock = { type: 'latex'; latex: string; };
|
||||
export type TextBlock = { type: 'text'; content: string; }; // for Text or Markdown
|
||||
|
||||
|
||||
@@ -9,28 +10,61 @@ export function parseBlocks(forceText: boolean, text: string): Block[] {
|
||||
if (forceText)
|
||||
return [{ type: 'text', content: text }];
|
||||
|
||||
if (text.startsWith('https://images.prodia.xyz/') && text.endsWith('.png') && text.length > 60)
|
||||
return [{ type: 'image', url: text.trim() }];
|
||||
const regexPatterns = {
|
||||
codeBlock: /`{3,}([\w\\.+-_]+)?\n([\s\S]*?)(`{3,}\n?|$)/g,
|
||||
imageBlock: /(https:\/\/images\.prodia\.xyz\/.*?\.png)/g, // NOTE: only Prodia for now - but this shall be expanded to markdown images  or any png/jpeg
|
||||
latexBlock: /\$\$(.*?)\$\$\n?/g,
|
||||
};
|
||||
|
||||
// noinspection HtmlRequiredTitleElement
|
||||
if (text.startsWith('<!DOCTYPE html') || text.startsWith('<head>\n'))
|
||||
return [{ type: 'html', html: text }];
|
||||
|
||||
const codeBlockRegex = /`{3,}([\w\\.+-_]+)?\n([\s\S]*?)(`{3,}|$)/g;
|
||||
const blocks: Block[] = [];
|
||||
|
||||
let lastIndex = 0;
|
||||
let match;
|
||||
|
||||
while ((match = codeBlockRegex.exec(text)) !== null) {
|
||||
blocks.push({ type: 'text', content: text.slice(lastIndex, match.index) });
|
||||
const blockTitle: string = (match[1] || '').trim();
|
||||
const blockCode: string = match[2].trim();
|
||||
const blockEnd: string = match[3];
|
||||
blocks.push({ type: 'code', blockTitle, blockCode, complete: blockEnd.startsWith('```') });
|
||||
while (true) {
|
||||
|
||||
// find the first match (if any) trying all the regexes
|
||||
let match: RegExpExecArray | null = null;
|
||||
let matchType: keyof typeof regexPatterns | null = null;
|
||||
for (const type in regexPatterns) {
|
||||
const regex = regexPatterns[type as keyof typeof regexPatterns];
|
||||
regex.lastIndex = lastIndex;
|
||||
const currentMatch = regex.exec(text);
|
||||
if (currentMatch && (match === null || currentMatch.index < match.index)) {
|
||||
match = currentMatch;
|
||||
matchType = type as keyof typeof regexPatterns;
|
||||
}
|
||||
}
|
||||
if (match === null)
|
||||
break;
|
||||
|
||||
// anything leftover before the match is text
|
||||
if (match.index > lastIndex)
|
||||
blocks.push({ type: 'text', content: text.slice(lastIndex, match.index) });
|
||||
|
||||
// add the block
|
||||
switch (matchType) {
|
||||
case 'codeBlock':
|
||||
const blockTitle: string = (match[1] || '').trim();
|
||||
const blockCode: string = match[2].trim();
|
||||
const blockEnd: string = match[3];
|
||||
blocks.push({ type: 'code', blockTitle, blockCode, complete: blockEnd.startsWith('```') });
|
||||
break;
|
||||
|
||||
case 'imageBlock':
|
||||
const url: string = match[1];
|
||||
blocks.push({ type: 'image', url });
|
||||
break;
|
||||
|
||||
case 'latexBlock':
|
||||
const latex: string = match[1];
|
||||
blocks.push({ type: 'text', content: latex });
|
||||
break;
|
||||
}
|
||||
|
||||
// advance the pointer
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
|
||||
// remainder is text
|
||||
if (lastIndex < text.length)
|
||||
blocks.push({ type: 'text', content: text.slice(lastIndex) });
|
||||
|
||||
|
||||
Reference in New Issue
Block a user