fix(editor): enhance markdown latex preprocessing (#11597)

Close [BS-2440](https://linear.app/affine-design/issue/BS-2440/inline-latex-markdown-adapter-需要更精确的处理)
This commit is contained in:
donteatfriedrice
2025-04-10 12:15:24 +00:00
parent 25418b402a
commit d5aebc1421
2 changed files with 116 additions and 11 deletions

View File

@@ -3769,6 +3769,48 @@ bbb
}); });
expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot); expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot);
}); });
test('escapes dollar signs followed by a digit or space and digit', async () => {
const markdown =
'The price of the T-shirt is $9.15 and the price of the hat is $ 8\n';
const blockSnapshot: BlockSnapshot = {
type: 'block',
id: 'matchesReplaceMap[0]',
flavour: 'affine:note',
props: {
xywh: '[0,0,800,95]',
background: DefaultTheme.noteBackgrounColor,
index: 'a0',
hidden: false,
displayMode: NoteDisplayMode.DocAndEdgeless,
},
children: [
{
type: 'block',
id: 'matchesReplaceMap[1]',
flavour: 'affine:paragraph',
props: {
type: 'text',
text: {
'$blocksuite:internal:text$': true,
delta: [
{
insert:
'The price of the T-shirt is $9.15 and the price of the hat is $ 8',
},
],
},
},
children: [],
},
],
};
const mdAdapter = new MarkdownAdapter(createJob(), provider);
const rawBlockSnapshot = await mdAdapter.toBlockSnapshot({
file: markdown,
});
expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot);
});
}); });
test('reference', async () => { test('reference', async () => {

View File

@@ -3,21 +3,84 @@ import {
MarkdownPreprocessorExtension, MarkdownPreprocessorExtension,
} from '@blocksuite/affine-shared/adapters'; } from '@blocksuite/affine-shared/adapters';
function escapeBrackets(text: string) {
const pattern =
/(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
return text.replaceAll(
pattern,
(match, codeBlock, squareBracket, roundBracket) => {
if (codeBlock) {
return codeBlock;
} else if (squareBracket) {
return `$$${squareBracket}$$`;
} else if (roundBracket) {
return `$${roundBracket}$`;
}
return match;
}
);
}
function escapeMhchem(text: string) {
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
}
/**
* Preprocess the content to protect code blocks and LaTeX expressions
* reference issue: https://github.com/remarkjs/react-markdown/issues/785
* reference comment: https://github.com/remarkjs/react-markdown/issues/785#issuecomment-2307567823
* @param content - The content to preprocess
* @returns The preprocessed content
*/
function preprocessLatex(content: string) {
// Protect code blocks
const codeBlocks: string[] = [];
let preprocessedContent = content;
preprocessedContent = preprocessedContent.replace(
/(```[\s\S]*?```|`[^`\n]+`)/g,
(_, code) => {
codeBlocks.push(code);
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
}
);
// Protect existing LaTeX expressions
const latexExpressions: string[] = [];
preprocessedContent = preprocessedContent.replace(
/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
match => {
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
}
);
// Escape dollar signs that are likely currency indicators
preprocessedContent = preprocessedContent.replace(/\$(?=\d)/g, '\\$');
// Restore LaTeX expressions
preprocessedContent = preprocessedContent.replace(
/<<LATEX_(\d+)>>/g,
(_, index) => latexExpressions[parseInt(index)]
);
// Restore code blocks
preprocessedContent = preprocessedContent.replace(
/<<CODE_BLOCK_(\d+)>>/g,
(_, index) => codeBlocks[parseInt(index)]
);
// Apply additional escaping functions
preprocessedContent = escapeBrackets(preprocessedContent);
preprocessedContent = escapeMhchem(preprocessedContent);
return preprocessedContent;
}
const latexPreprocessor: MarkdownAdapterPreprocessor = { const latexPreprocessor: MarkdownAdapterPreprocessor = {
name: 'latex', name: 'latex',
levels: ['block', 'slice', 'doc'], levels: ['block', 'slice', 'doc'],
preprocess: content => { preprocess: content => {
// Replace block-level LaTeX delimiters \[ \] with $$ $$ return preprocessLatex(content);
const blockProcessedContent = content.replace(
/\\\[(.*?)\\\]/gs,
(_, equation) => `$$${equation}$$`
);
// Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessedContent = blockProcessedContent.replace(
/\\\((.*?)\\\)/gs,
(_, equation) => `$${equation}$`
);
return inlineProcessedContent;
}, },
}; };