From 10cf62c2b41d975115f996e13ba4bfe005ffaa4b Mon Sep 17 00:00:00 2001 From: Chen <99816898+donteatfriedrice@users.noreply.github.com> Date: Fri, 23 May 2025 17:56:31 +0800 Subject: [PATCH] fix(editor): handle polynomial regular expression used on uncontrolled data issue --- .../adapters/preprocessor.unit.spec.ts | 6 + .../src/adapters/markdown/preprocessor.ts | 120 ++++++++++++++++-- 2 files changed, 115 insertions(+), 11 deletions(-) diff --git a/blocksuite/affine/inlines/footnote/src/__tests__/adapters/preprocessor.unit.spec.ts b/blocksuite/affine/inlines/footnote/src/__tests__/adapters/preprocessor.unit.spec.ts index 5ef0f62cea..fa909f37a7 100644 --- a/blocksuite/affine/inlines/footnote/src/__tests__/adapters/preprocessor.unit.spec.ts +++ b/blocksuite/affine/inlines/footnote/src/__tests__/adapters/preprocessor.unit.spec.ts @@ -33,6 +33,12 @@ describe('FootnoteReferenceMarkdownPreprocessorExtension', () => { expect(preprocessFootnoteReference(content)).toBe(expected); }); + it('should not add space when there is not a footnote reference after URL', () => { + const content = 'https://example.com[^label'; + const expected = 'https://example.com[^label'; + expect(preprocessFootnoteReference(content)).toBe(expected); + }); + it('should handle multiple footnote references with mixed URL and non-URL text', () => { const content = 'https://example.com[^1]normal text[^2]http://test.com[^3]'; const expected = diff --git a/blocksuite/affine/inlines/footnote/src/adapters/markdown/preprocessor.ts b/blocksuite/affine/inlines/footnote/src/adapters/markdown/preprocessor.ts index 4db7e06a35..676963860a 100644 --- a/blocksuite/affine/inlines/footnote/src/adapters/markdown/preprocessor.ts +++ b/blocksuite/affine/inlines/footnote/src/adapters/markdown/preprocessor.ts @@ -17,6 +17,113 @@ function isUrl(str: string): boolean { } } +// Types for footnote parser tokens +type Token = { + type: 'TEXT' | 'FOOTNOTE_REF' | 'SPACE'; + value: string; +}; + +class FootnoteReferenceParser { + private pos: number = 0; + private input: string = ''; + private tokens: Token[] = []; + + // Lexer: Convert input string into tokens + private tokenize(input: string): Token[] { + this.input = input; + this.pos = 0; + this.tokens = []; + + while (this.pos < this.input.length) { + const char = this.input[this.pos]; + + // Handle spaces + if (char === ' ') { + this.tokens.push({ type: 'SPACE', value: ' ' }); + this.pos++; + continue; + } + + // Match footnote reference [^...] + if (char === '[' && this.input[this.pos + 1] === '^') { + let footnoteRef = '[^'; + this.pos += 2; + + while (this.pos < this.input.length && this.input[this.pos] !== ']') { + footnoteRef += this.input[this.pos]; + this.pos++; + } + + if (this.pos < this.input.length) { + footnoteRef += ']'; + this.pos++; + // Only add as footnote reference if it's not followed by ':' + if (this.pos >= this.input.length || this.input[this.pos] !== ':') { + this.tokens.push({ type: 'FOOTNOTE_REF', value: footnoteRef }); + } else { + this.tokens.push({ type: 'TEXT', value: footnoteRef }); + } + } else { + // If we didn't find a closing bracket, treat it as regular text + this.tokens.push({ type: 'TEXT', value: footnoteRef }); + } + continue; + } + + // Handle regular text + let text = ''; + while ( + this.pos < this.input.length && + this.input[this.pos] !== ' ' && + !(this.input[this.pos] === '[' && this.input[this.pos + 1] === '^') + ) { + text += this.input[this.pos]; + this.pos++; + } + if (text) { + this.tokens.push({ type: 'TEXT', value: text }); + } + } + + return this.tokens; + } + + // Process tokens to add spaces after URLs + private processTokens(tokens: Token[]): string { + let result = ''; + let i = 0; + + while (i < tokens.length) { + const token = tokens[i]; + + if (token.type === 'TEXT') { + result += token.value; + + // Check if next token is a footnote reference + if ( + i + 1 < tokens.length && + tokens[i + 1].type === 'FOOTNOTE_REF' && + isUrl(token.value) + ) { + result += ' '; + } + } else { + result += token.value; + } + + i++; + } + + return result; + } + + // Main processing function + public process(input: string): string { + const tokens = this.tokenize(input); + return this.processTokens(tokens); + } +} + /** * Preprocess footnote references to avoid markdown link parsing * Only add space when footnote reference follows a URL @@ -29,17 +136,8 @@ function isUrl(str: string): boolean { * ``` */ export function preprocessFootnoteReference(content: string) { - return content.replace( - /([^\s]+?)(\[\^[^\]]+\])(?!:)/g, - (match, prevText, footnoteRef) => { - // Only add space if the previous text is a URL - if (isUrl(prevText)) { - return prevText + ' ' + footnoteRef; - } - // Otherwise return the original match - return match; - } - ); + const parser = new FootnoteReferenceParser(); + return parser.process(content); } const footnoteReferencePreprocessor: MarkdownAdapterPreprocessor = {