fix(editor): handle polynomial regular expression used on uncontrolled data issue

This commit is contained in:
Chen
2025-05-23 17:56:31 +08:00
parent 9220b973c7
commit 10cf62c2b4
2 changed files with 115 additions and 11 deletions
@@ -33,6 +33,12 @@ describe('FootnoteReferenceMarkdownPreprocessorExtension', () => {
expect(preprocessFootnoteReference(content)).toBe(expected);
});
it('should not add space when there is not a footnote reference after URL', () => {
const content = 'https://example.com[^label';
const expected = 'https://example.com[^label';
expect(preprocessFootnoteReference(content)).toBe(expected);
});
it('should handle multiple footnote references with mixed URL and non-URL text', () => {
const content = 'https://example.com[^1]normal text[^2]http://test.com[^3]';
const expected =
@@ -17,6 +17,113 @@ function isUrl(str: string): boolean {
}
}
// Types for footnote parser tokens
type Token = {
type: 'TEXT' | 'FOOTNOTE_REF' | 'SPACE';
value: string;
};
class FootnoteReferenceParser {
private pos: number = 0;
private input: string = '';
private tokens: Token[] = [];
// Lexer: Convert input string into tokens
private tokenize(input: string): Token[] {
this.input = input;
this.pos = 0;
this.tokens = [];
while (this.pos < this.input.length) {
const char = this.input[this.pos];
// Handle spaces
if (char === ' ') {
this.tokens.push({ type: 'SPACE', value: ' ' });
this.pos++;
continue;
}
// Match footnote reference [^...]
if (char === '[' && this.input[this.pos + 1] === '^') {
let footnoteRef = '[^';
this.pos += 2;
while (this.pos < this.input.length && this.input[this.pos] !== ']') {
footnoteRef += this.input[this.pos];
this.pos++;
}
if (this.pos < this.input.length) {
footnoteRef += ']';
this.pos++;
// Only add as footnote reference if it's not followed by ':'
if (this.pos >= this.input.length || this.input[this.pos] !== ':') {
this.tokens.push({ type: 'FOOTNOTE_REF', value: footnoteRef });
} else {
this.tokens.push({ type: 'TEXT', value: footnoteRef });
}
} else {
// If we didn't find a closing bracket, treat it as regular text
this.tokens.push({ type: 'TEXT', value: footnoteRef });
}
continue;
}
// Handle regular text
let text = '';
while (
this.pos < this.input.length &&
this.input[this.pos] !== ' ' &&
!(this.input[this.pos] === '[' && this.input[this.pos + 1] === '^')
) {
text += this.input[this.pos];
this.pos++;
}
if (text) {
this.tokens.push({ type: 'TEXT', value: text });
}
}
return this.tokens;
}
// Process tokens to add spaces after URLs
private processTokens(tokens: Token[]): string {
let result = '';
let i = 0;
while (i < tokens.length) {
const token = tokens[i];
if (token.type === 'TEXT') {
result += token.value;
// Check if next token is a footnote reference
if (
i + 1 < tokens.length &&
tokens[i + 1].type === 'FOOTNOTE_REF' &&
isUrl(token.value)
) {
result += ' ';
}
} else {
result += token.value;
}
i++;
}
return result;
}
// Main processing function
public process(input: string): string {
const tokens = this.tokenize(input);
return this.processTokens(tokens);
}
}
/**
* Preprocess footnote references to avoid markdown link parsing
* Only add space when footnote reference follows a URL
@@ -29,17 +136,8 @@ function isUrl(str: string): boolean {
* ```
*/
export function preprocessFootnoteReference(content: string) {
return content.replace(
/([^\s]+?)(\[\^[^\]]+\])(?!:)/g,
(match, prevText, footnoteRef) => {
// Only add space if the previous text is a URL
if (isUrl(prevText)) {
return prevText + ' ' + footnoteRef;
}
// Otherwise return the original match
return match;
}
);
const parser = new FootnoteReferenceParser();
return parser.process(content);
}
const footnoteReferencePreprocessor: MarkdownAdapterPreprocessor = {