From 568a390b759a04b4865989b11cd9113fc31bf1f7 Mon Sep 17 00:00:00 2001 From: donteatfriedrice Date: Mon, 7 Apr 2025 02:18:04 +0000 Subject: [PATCH] feat(editor): support markdown adapter preprocessed with latex delimiters (#11431) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To close [BS-2870](https://linear.app/affine-design/issue/BS-2870/支持识别-和-[-包裹内容为公式) ## Add Markdown Preprocessor Extension and Enhanced LaTeX Support ### Markdown Preprocessor Extension This PR introduces a new preprocessor extension for Markdown adapters that allows preprocessing of content before conversion: Adds a new PreprocessorManager for handling text transformations Introduces extensible preprocessor interface that supports different processing levels (block/slice/doc) Integrates preprocessor extension into the existing Markdown adapter workflow ### LaTeX Support Enhancement Extends LaTeX support to handle both traditional and alternative syntax: Adds support for backslash LaTeX syntax: Block math: ```\[...\] ``` alongside existing ```$$...$$``` Inline math: ```\(...\) ``` alongside existing ```$...$``` Implements LaTeX preprocessor to standardize syntax before conversion Updates tests to cover both syntax variants --- .../__tests__/adapters/markdown.unit.spec.ts | 155 +++++++++--------- .../affine/all/src/adapters/extension.ts | 2 + blocksuite/affine/all/src/adapters/index.ts | 1 + .../all/src/adapters/markdown/preprocessor.ts | 7 + .../block-code/src/adapters/extension.ts | 6 +- .../blocks/block-code/src/adapters/index.ts | 2 +- .../block-code/src/adapters/markdown/index.ts | 12 ++ .../src/adapters/{ => markdown}/markdown.ts | 0 .../src/adapters/markdown/preprocessor.ts | 76 +++++++++ .../block-latex/src/adapters/extension.ts | 6 +- .../blocks/block-latex/src/adapters/index.ts | 2 +- .../src/adapters/markdown/index.ts | 12 ++ .../src/adapters/{ => markdown}/markdown.ts | 0 .../src/adapters/markdown/preprocessor.ts | 25 +++ .../src/adapters/html/delta-converter.ts | 2 +- .../affine/shared/src/adapters/index.ts | 3 + .../src/adapters/markdown/delta-converter.ts | 2 +- .../shared/src/adapters/markdown/index.ts | 1 + .../shared/src/adapters/markdown/markdown.ts | 78 ++------- .../src/adapters/markdown/preprocessor.ts | 40 +++++ .../adapters/notion-html/delta-converter.ts | 2 +- .../adapters/plain-text/delta-converter.ts | 4 +- .../shared/src/adapters/types/adapter.ts | 61 +------ .../src/adapters/types/delta-converter.ts | 61 +++++++ .../affine/shared/src/adapters/types/index.ts | 2 + .../shared/src/adapters/types/preprocessor.ts | 88 ++++++++++ .../nbstore/src/sync/indexer/crawler.ts | 12 +- packages/frontend/apps/android/src/app.tsx | 12 +- packages/frontend/apps/ios/src/app.tsx | 12 +- .../blocksuite/ai/components/text-renderer.ts | 12 +- .../__tests__/mindmap-preview.unit.spec.ts | 12 +- 31 files changed, 450 insertions(+), 260 deletions(-) create mode 100644 blocksuite/affine/all/src/adapters/markdown/preprocessor.ts create mode 100644 blocksuite/affine/blocks/block-code/src/adapters/markdown/index.ts rename blocksuite/affine/blocks/block-code/src/adapters/{ => markdown}/markdown.ts (100%) create mode 100644 blocksuite/affine/blocks/block-code/src/adapters/markdown/preprocessor.ts create mode 100644 blocksuite/affine/blocks/block-latex/src/adapters/markdown/index.ts rename blocksuite/affine/blocks/block-latex/src/adapters/{ => markdown}/markdown.ts (100%) create mode 100644 blocksuite/affine/blocks/block-latex/src/adapters/markdown/preprocessor.ts create mode 100644 blocksuite/affine/shared/src/adapters/markdown/preprocessor.ts create mode 100644 blocksuite/affine/shared/src/adapters/types/delta-converter.ts create mode 100644 blocksuite/affine/shared/src/adapters/types/preprocessor.ts diff --git a/blocksuite/affine/all/src/__tests__/adapters/markdown.unit.spec.ts b/blocksuite/affine/all/src/__tests__/adapters/markdown.unit.spec.ts index 0ba25b14cd..1b731d1ad8 100644 --- a/blocksuite/affine/all/src/__tests__/adapters/markdown.unit.spec.ts +++ b/blocksuite/affine/all/src/__tests__/adapters/markdown.unit.spec.ts @@ -3680,88 +3680,95 @@ bbb expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot); }); - test('inline latex', async () => { - const markdown = 'inline $E=mc^2$ latex\n'; - const blockSnapshot: BlockSnapshot = { - type: 'block', - id: 'matchesReplaceMap[0]', - flavour: 'affine:note', - props: { - xywh: '[0,0,800,95]', - background: DefaultTheme.noteBackgrounColor, - index: 'a0', - hidden: false, - displayMode: NoteDisplayMode.DocAndEdgeless, - }, - children: [ - { - type: 'block', - id: 'matchesReplaceMap[1]', - flavour: 'affine:paragraph', - props: { - type: 'text', - text: { - '$blocksuite:internal:text$': true, - delta: [ - { - insert: 'inline ', - }, - { - insert: ' ', - attributes: { - latex: 'E=mc^2', - }, - }, - { - insert: ' latex', - }, - ], - }, - }, - children: [], + describe('inline latex', () => { + test.each([ + ['dollar sign syntax', 'inline $E=mc^2$ latex\n'], + ['backslash syntax', 'inline \\(E=mc^2\\) latex\n'], + ])('should convert %s correctly', async (_, markdown) => { + const blockSnapshot: BlockSnapshot = { + type: 'block', + id: 'matchesReplaceMap[0]', + flavour: 'affine:note', + props: { + xywh: '[0,0,800,95]', + background: DefaultTheme.noteBackgrounColor, + index: 'a0', + hidden: false, + displayMode: NoteDisplayMode.DocAndEdgeless, }, - ], - }; + children: [ + { + type: 'block', + id: 'matchesReplaceMap[1]', + flavour: 'affine:paragraph', + props: { + type: 'text', + text: { + '$blocksuite:internal:text$': true, + delta: [ + { + insert: 'inline ', + }, + { + insert: ' ', + attributes: { + latex: 'E=mc^2', + }, + }, + { + insert: ' latex', + }, + ], + }, + }, + children: [], + }, + ], + }; - const mdAdapter = new MarkdownAdapter(createJob(), provider); - const rawBlockSnapshot = await mdAdapter.toBlockSnapshot({ - file: markdown, + const mdAdapter = new MarkdownAdapter(createJob(), provider); + const rawBlockSnapshot = await mdAdapter.toBlockSnapshot({ + file: markdown, + }); + expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot); }); - expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot); }); - test('latex block', async () => { - const markdown = '$$\nE=mc^2\n$$\n'; - - const blockSnapshot: BlockSnapshot = { - type: 'block', - id: 'matchesReplaceMap[0]', - flavour: 'affine:note', - props: { - xywh: '[0,0,800,95]', - background: DefaultTheme.noteBackgrounColor, - index: 'a0', - hidden: false, - displayMode: NoteDisplayMode.DocAndEdgeless, - }, - children: [ - { - type: 'block', - id: 'matchesReplaceMap[1]', - flavour: 'affine:latex', - props: { - latex: 'E=mc^2', - }, - children: [], + describe('latex block', () => { + test.each([ + ['dollar sign syntax', '$$\nE=mc^2\n$$\n'], + ['backslash syntax', '\\[\nE=mc^2\n\\]\n'], + ])('should convert %s correctly', async (_, markdown) => { + const blockSnapshot: BlockSnapshot = { + type: 'block', + id: 'matchesReplaceMap[0]', + flavour: 'affine:note', + props: { + xywh: '[0,0,800,95]', + background: DefaultTheme.noteBackgrounColor, + index: 'a0', + hidden: false, + displayMode: NoteDisplayMode.DocAndEdgeless, }, - ], - }; + children: [ + { + type: 'block', + id: 'matchesReplaceMap[1]', + flavour: 'affine:latex', + props: { + latex: 'E=mc^2', + }, + children: [], + }, + ], + }; - const mdAdapter = new MarkdownAdapter(createJob(), provider); - const rawBlockSnapshot = await mdAdapter.toBlockSnapshot({ - file: markdown, + const mdAdapter = new MarkdownAdapter(createJob(), provider); + const rawBlockSnapshot = await mdAdapter.toBlockSnapshot({ + file: markdown, + }); + expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot); }); - expect(nanoidReplacement(rawBlockSnapshot)).toEqual(blockSnapshot); }); test('reference', async () => { diff --git a/blocksuite/affine/all/src/adapters/extension.ts b/blocksuite/affine/all/src/adapters/extension.ts index 051639c949..c452d208bb 100644 --- a/blocksuite/affine/all/src/adapters/extension.ts +++ b/blocksuite/affine/all/src/adapters/extension.ts @@ -20,6 +20,7 @@ import type { ExtensionType } from '@blocksuite/store'; import { defaultBlockHtmlAdapterMatchers } from './html/block-matcher'; import { defaultBlockMarkdownAdapterMatchers } from './markdown/block-matcher'; +import { defaultMarkdownPreprocessors } from './markdown/preprocessor'; import { defaultBlockNotionHtmlAdapterMatchers } from './notion-html/block-matcher'; import { defaultBlockPlainTextAdapterMatchers } from './plain-text/block-matcher'; @@ -44,6 +45,7 @@ export const MarkdownAdapterExtension: ExtensionType[] = [ ...MarkdownInlineToDeltaAdapterExtensions, ...defaultBlockMarkdownAdapterMatchers, ...InlineDeltaToMarkdownAdapterExtensions, + ...defaultMarkdownPreprocessors, ]; export const NotionHtmlAdapterExtension: ExtensionType[] = [ diff --git a/blocksuite/affine/all/src/adapters/index.ts b/blocksuite/affine/all/src/adapters/index.ts index 355576fd15..5b0377572c 100644 --- a/blocksuite/affine/all/src/adapters/index.ts +++ b/blocksuite/affine/all/src/adapters/index.ts @@ -1,5 +1,6 @@ export * from './extension.js'; export * from './html/block-matcher.js'; export * from './markdown/block-matcher.js'; +export * from './markdown/preprocessor.js'; export * from './notion-html/block-matcher.js'; export * from './plain-text/block-matcher.js'; diff --git a/blocksuite/affine/all/src/adapters/markdown/preprocessor.ts b/blocksuite/affine/all/src/adapters/markdown/preprocessor.ts new file mode 100644 index 0000000000..48a630f5a6 --- /dev/null +++ b/blocksuite/affine/all/src/adapters/markdown/preprocessor.ts @@ -0,0 +1,7 @@ +import { CodeMarkdownPreprocessorExtension } from '@blocksuite/affine-block-code'; +import { LatexMarkdownPreprocessorExtension } from '@blocksuite/affine-block-latex'; + +export const defaultMarkdownPreprocessors = [ + LatexMarkdownPreprocessorExtension, + CodeMarkdownPreprocessorExtension, +]; diff --git a/blocksuite/affine/blocks/block-code/src/adapters/extension.ts b/blocksuite/affine/blocks/block-code/src/adapters/extension.ts index 9006ac28f0..b48c0475f5 100644 --- a/blocksuite/affine/blocks/block-code/src/adapters/extension.ts +++ b/blocksuite/affine/blocks/block-code/src/adapters/extension.ts @@ -1,13 +1,13 @@ import type { ExtensionType } from '@blocksuite/store'; import { CodeBlockHtmlAdapterExtension } from './html.js'; -import { CodeBlockMarkdownAdapterExtension } from './markdown.js'; +import { CodeBlockMarkdownAdapterExtensions } from './markdown/index.js'; import { CodeBlockNotionHtmlAdapterExtension } from './notion-html.js'; import { CodeBlockPlainTextAdapterExtension } from './plain-text.js'; export const CodeBlockAdapterExtensions: ExtensionType[] = [ CodeBlockHtmlAdapterExtension, - CodeBlockMarkdownAdapterExtension, + CodeBlockMarkdownAdapterExtensions, CodeBlockPlainTextAdapterExtension, CodeBlockNotionHtmlAdapterExtension, -]; +].flat(); diff --git a/blocksuite/affine/blocks/block-code/src/adapters/index.ts b/blocksuite/affine/blocks/block-code/src/adapters/index.ts index b4dd5a6d2a..d91936594e 100644 --- a/blocksuite/affine/blocks/block-code/src/adapters/index.ts +++ b/blocksuite/affine/blocks/block-code/src/adapters/index.ts @@ -1,4 +1,4 @@ export * from './html.js'; -export * from './markdown.js'; +export * from './markdown/index.js'; export * from './notion-html.js'; export * from './plain-text.js'; diff --git a/blocksuite/affine/blocks/block-code/src/adapters/markdown/index.ts b/blocksuite/affine/blocks/block-code/src/adapters/markdown/index.ts new file mode 100644 index 0000000000..b6570aa0f7 --- /dev/null +++ b/blocksuite/affine/blocks/block-code/src/adapters/markdown/index.ts @@ -0,0 +1,12 @@ +import type { ExtensionType } from '@blocksuite/store'; + +import { CodeBlockMarkdownAdapterExtension } from './markdown.js'; +import { CodeMarkdownPreprocessorExtension } from './preprocessor.js'; + +export * from './markdown.js'; +export * from './preprocessor.js'; + +export const CodeBlockMarkdownAdapterExtensions: ExtensionType[] = [ + CodeMarkdownPreprocessorExtension, + CodeBlockMarkdownAdapterExtension, +]; diff --git a/blocksuite/affine/blocks/block-code/src/adapters/markdown.ts b/blocksuite/affine/blocks/block-code/src/adapters/markdown/markdown.ts similarity index 100% rename from blocksuite/affine/blocks/block-code/src/adapters/markdown.ts rename to blocksuite/affine/blocks/block-code/src/adapters/markdown/markdown.ts diff --git a/blocksuite/affine/blocks/block-code/src/adapters/markdown/preprocessor.ts b/blocksuite/affine/blocks/block-code/src/adapters/markdown/preprocessor.ts new file mode 100644 index 0000000000..b5851a7768 --- /dev/null +++ b/blocksuite/affine/blocks/block-code/src/adapters/markdown/preprocessor.ts @@ -0,0 +1,76 @@ +import { + type MarkdownAdapterPreprocessor, + MarkdownPreprocessorExtension, +} from '@blocksuite/affine-shared/adapters'; + +const codePreprocessor: MarkdownAdapterPreprocessor = { + name: 'code', + levels: ['slice'], + preprocess: content => { + let codeFence = ''; + const lines = content + .split('\n') + .map(line => { + if (line.trimStart().startsWith('-')) { + return line; + } + let trimmedLine = line.trimStart(); + if (!codeFence && trimmedLine.startsWith('```')) { + codeFence = trimmedLine.substring( + 0, + trimmedLine.lastIndexOf('```') + 3 + ); + if (codeFence.split('').every(c => c === '`')) { + return line; + } + codeFence = ''; + } + if (!codeFence && trimmedLine.startsWith('~~~')) { + codeFence = trimmedLine.substring( + 0, + trimmedLine.lastIndexOf('~~~') + 3 + ); + if (codeFence.split('').every(c => c === '~')) { + return line; + } + codeFence = ''; + } + if ( + !!codeFence && + trimmedLine.startsWith(codeFence) && + trimmedLine.lastIndexOf(codeFence) === 0 + ) { + codeFence = ''; + } + if (codeFence) { + return line; + } + + trimmedLine = trimmedLine.trimEnd(); + if (!trimmedLine.startsWith('<') && !trimmedLine.endsWith('>')) { + // check if it is a url link and wrap it with the angle brackets + // sometimes the url includes emphasis `_` that will break URL parsing + // + // eg. /MuawcBMT1Mzvoar09-_66?mode=page&blockIds=rL2_GXbtLU2SsJVfCSmh_ + // https://www.markdownguide.org/basic-syntax/#urls-and-email-addresses + try { + const valid = + URL.canParse?.(trimmedLine) ?? Boolean(new URL(trimmedLine)); + if (valid) { + return `<${trimmedLine}>`; + } + } catch (err) { + console.log(err); + } + } + + return line.replace(/^ /, ' '); + }) + .join('\n'); + + return lines; + }, +}; + +export const CodeMarkdownPreprocessorExtension = + MarkdownPreprocessorExtension(codePreprocessor); diff --git a/blocksuite/affine/blocks/block-latex/src/adapters/extension.ts b/blocksuite/affine/blocks/block-latex/src/adapters/extension.ts index 7e7ca335a2..dcf76a0c2a 100644 --- a/blocksuite/affine/blocks/block-latex/src/adapters/extension.ts +++ b/blocksuite/affine/blocks/block-latex/src/adapters/extension.ts @@ -1,11 +1,11 @@ import type { ExtensionType } from '@blocksuite/store'; -import { LatexBlockMarkdownAdapterExtension } from './markdown.js'; +import { LatexMarkdownAdapterExtensions } from './markdown/index.js'; import { LatexBlockNotionHtmlAdapterExtension } from './notion-html.js'; import { LatexBlockPlainTextAdapterExtension } from './plain-text.js'; export const LatexBlockAdapterExtensions: ExtensionType[] = [ - LatexBlockMarkdownAdapterExtension, + LatexMarkdownAdapterExtensions, LatexBlockNotionHtmlAdapterExtension, LatexBlockPlainTextAdapterExtension, -]; +].flat(); diff --git a/blocksuite/affine/blocks/block-latex/src/adapters/index.ts b/blocksuite/affine/blocks/block-latex/src/adapters/index.ts index b30abd9352..457ae20cf9 100644 --- a/blocksuite/affine/blocks/block-latex/src/adapters/index.ts +++ b/blocksuite/affine/blocks/block-latex/src/adapters/index.ts @@ -1,3 +1,3 @@ -export * from './markdown.js'; +export * from './markdown/index.js'; export * from './notion-html.js'; export * from './plain-text.js'; diff --git a/blocksuite/affine/blocks/block-latex/src/adapters/markdown/index.ts b/blocksuite/affine/blocks/block-latex/src/adapters/markdown/index.ts new file mode 100644 index 0000000000..cfe03c6493 --- /dev/null +++ b/blocksuite/affine/blocks/block-latex/src/adapters/markdown/index.ts @@ -0,0 +1,12 @@ +import type { ExtensionType } from '@blocksuite/store'; + +import { LatexBlockMarkdownAdapterExtension } from './markdown.js'; +import { LatexMarkdownPreprocessorExtension } from './preprocessor.js'; + +export * from './markdown.js'; +export * from './preprocessor.js'; + +export const LatexMarkdownAdapterExtensions: ExtensionType[] = [ + LatexMarkdownPreprocessorExtension, + LatexBlockMarkdownAdapterExtension, +]; diff --git a/blocksuite/affine/blocks/block-latex/src/adapters/markdown.ts b/blocksuite/affine/blocks/block-latex/src/adapters/markdown/markdown.ts similarity index 100% rename from blocksuite/affine/blocks/block-latex/src/adapters/markdown.ts rename to blocksuite/affine/blocks/block-latex/src/adapters/markdown/markdown.ts diff --git a/blocksuite/affine/blocks/block-latex/src/adapters/markdown/preprocessor.ts b/blocksuite/affine/blocks/block-latex/src/adapters/markdown/preprocessor.ts new file mode 100644 index 0000000000..1d6e7985f4 --- /dev/null +++ b/blocksuite/affine/blocks/block-latex/src/adapters/markdown/preprocessor.ts @@ -0,0 +1,25 @@ +import { + type MarkdownAdapterPreprocessor, + MarkdownPreprocessorExtension, +} from '@blocksuite/affine-shared/adapters'; + +const latexPreprocessor: MarkdownAdapterPreprocessor = { + name: 'latex', + levels: ['block', 'slice', 'doc'], + preprocess: content => { + // Replace block-level LaTeX delimiters \[ \] with $$ $$ + const blockProcessedContent = content.replace( + /\\\[(.*?)\\\]/gs, + (_, equation) => `$$${equation}$$` + ); + // Replace inline LaTeX delimiters \( \) with $ $ + const inlineProcessedContent = blockProcessedContent.replace( + /\\\((.*?)\\\)/gs, + (_, equation) => `$${equation}$` + ); + return inlineProcessedContent; + }, +}; + +export const LatexMarkdownPreprocessorExtension = + MarkdownPreprocessorExtension(latexPreprocessor); diff --git a/blocksuite/affine/shared/src/adapters/html/delta-converter.ts b/blocksuite/affine/shared/src/adapters/html/delta-converter.ts index 6b5c6d9308..d894aaadc6 100644 --- a/blocksuite/affine/shared/src/adapters/html/delta-converter.ts +++ b/blocksuite/affine/shared/src/adapters/html/delta-converter.ts @@ -11,7 +11,7 @@ import { DeltaASTConverter, type DeltaASTConverterOptions, type InlineDeltaMatcher, -} from '../types/adapter.js'; +} from '../types/delta-converter.js'; import type { HtmlAST, InlineHtmlAST } from '../types/hast.js'; import { AdapterTextUtils } from '../utils/text.js'; diff --git a/blocksuite/affine/shared/src/adapters/index.ts b/blocksuite/affine/shared/src/adapters/index.ts index 5b50f4a972..016ee0c3c5 100644 --- a/blocksuite/affine/shared/src/adapters/index.ts +++ b/blocksuite/affine/shared/src/adapters/index.ts @@ -30,11 +30,14 @@ export { MarkdownAdapter, MarkdownAdapterFactoryExtension, MarkdownAdapterFactoryIdentifier, + type MarkdownAdapterPreprocessor, type MarkdownAST, MarkdownASTToDeltaExtension, type MarkdownASTToDeltaMatcher, MarkdownASTToDeltaMatcherIdentifier, MarkdownDeltaConverter, + MarkdownPreprocessorExtension, + MarkdownPreprocessorManager, } from './markdown'; export * from './middlewares'; export * from './mix-text'; diff --git a/blocksuite/affine/shared/src/adapters/markdown/delta-converter.ts b/blocksuite/affine/shared/src/adapters/markdown/delta-converter.ts index 3dd10e6491..becb8f91e4 100644 --- a/blocksuite/affine/shared/src/adapters/markdown/delta-converter.ts +++ b/blocksuite/affine/shared/src/adapters/markdown/delta-converter.ts @@ -10,7 +10,7 @@ import { type ASTToDeltaMatcher, DeltaASTConverter, type InlineDeltaMatcher, -} from '../types/adapter.js'; +} from '../types/delta-converter.js'; import type { MarkdownAST } from './type.js'; export type InlineDeltaToMarkdownAdapterMatcher = diff --git a/blocksuite/affine/shared/src/adapters/markdown/index.ts b/blocksuite/affine/shared/src/adapters/markdown/index.ts index 31a591041a..07cd5127c9 100644 --- a/blocksuite/affine/shared/src/adapters/markdown/index.ts +++ b/blocksuite/affine/shared/src/adapters/markdown/index.ts @@ -1,4 +1,5 @@ export * from './block-adapter.js'; export * from './delta-converter.js'; export * from './markdown.js'; +export * from './preprocessor.js'; export * from './type.js'; diff --git a/blocksuite/affine/shared/src/adapters/markdown/markdown.ts b/blocksuite/affine/shared/src/adapters/markdown/markdown.ts index 895dceec94..1788bd8940 100644 --- a/blocksuite/affine/shared/src/adapters/markdown/markdown.ts +++ b/blocksuite/affine/shared/src/adapters/markdown/markdown.ts @@ -37,6 +37,7 @@ import { MarkdownDeltaConverter, } from './delta-converter'; import { remarkGfm } from './gfm'; +import { MarkdownPreprocessorManager } from './preprocessor'; import type { Markdown, MarkdownAST } from './type'; type MarkdownToSliceSnapshotPayload = { @@ -167,6 +168,7 @@ export class MarkdownAdapter extends BaseAdapter { }; deltaConverter: MarkdownDeltaConverter; + preprocessorManager: MarkdownPreprocessorManager; readonly blockMatchers: BlockMarkdownAdapterMatcher[]; @@ -187,6 +189,7 @@ export class MarkdownAdapter extends BaseAdapter { inlineDeltaToMarkdownAdapterMatchers, markdownInlineToDeltaMatchers ); + this.preprocessorManager = new MarkdownPreprocessorManager(provider); } private _astToMarkdown(ast: Root) { @@ -273,7 +276,11 @@ export class MarkdownAdapter extends BaseAdapter { async toBlockSnapshot( payload: ToBlockSnapshotPayload ): Promise { - const markdownAst = this._markdownToAst(payload.file); + const markdownFile = this.preprocessorManager.process( + 'block', + payload.file + ); + const markdownAst = this._markdownToAst(markdownFile); const blockSnapshotRoot = { type: 'block', id: nanoid(), @@ -297,7 +304,8 @@ export class MarkdownAdapter extends BaseAdapter { async toDocSnapshot( payload: ToDocSnapshotPayload ): Promise { - const markdownAst = this._markdownToAst(payload.file); + const markdownFile = this.preprocessorManager.process('doc', payload.file); + const markdownAst = this._markdownToAst(markdownFile); const blockSnapshotRoot = { type: 'block', id: nanoid(), @@ -356,67 +364,11 @@ export class MarkdownAdapter extends BaseAdapter { async toSliceSnapshot( payload: MarkdownToSliceSnapshotPayload ): Promise { - let codeFence = ''; - payload.file = payload.file - .split('\n') - .map(line => { - if (line.trimStart().startsWith('-')) { - return line; - } - let trimmedLine = line.trimStart(); - if (!codeFence && trimmedLine.startsWith('```')) { - codeFence = trimmedLine.substring( - 0, - trimmedLine.lastIndexOf('```') + 3 - ); - if (codeFence.split('').every(c => c === '`')) { - return line; - } - codeFence = ''; - } - if (!codeFence && trimmedLine.startsWith('~~~')) { - codeFence = trimmedLine.substring( - 0, - trimmedLine.lastIndexOf('~~~') + 3 - ); - if (codeFence.split('').every(c => c === '~')) { - return line; - } - codeFence = ''; - } - if ( - !!codeFence && - trimmedLine.startsWith(codeFence) && - trimmedLine.lastIndexOf(codeFence) === 0 - ) { - codeFence = ''; - } - if (codeFence) { - return line; - } - - trimmedLine = trimmedLine.trimEnd(); - if (!trimmedLine.startsWith('<') && !trimmedLine.endsWith('>')) { - // check if it is a url link and wrap it with the angle brackets - // sometimes the url includes emphasis `_` that will break URL parsing - // - // eg. /MuawcBMT1Mzvoar09-_66?mode=page&blockIds=rL2_GXbtLU2SsJVfCSmh_ - // https://www.markdownguide.org/basic-syntax/#urls-and-email-addresses - try { - const valid = - URL.canParse?.(trimmedLine) ?? Boolean(new URL(trimmedLine)); - if (valid) { - return `<${trimmedLine}>`; - } - } catch (err) { - console.log(err); - } - } - - return line.replace(/^ /, ' '); - }) - .join('\n'); - const markdownAst = this._markdownToAst(payload.file); + const markdownFile = this.preprocessorManager.process( + 'slice', + payload.file + ); + const markdownAst = this._markdownToAst(markdownFile); const blockSnapshotRoot = { type: 'block', id: nanoid(), diff --git a/blocksuite/affine/shared/src/adapters/markdown/preprocessor.ts b/blocksuite/affine/shared/src/adapters/markdown/preprocessor.ts new file mode 100644 index 0000000000..2d99f7d86f --- /dev/null +++ b/blocksuite/affine/shared/src/adapters/markdown/preprocessor.ts @@ -0,0 +1,40 @@ +import { + createIdentifier, + type ServiceIdentifier, + type ServiceProvider, +} from '@blocksuite/global/di'; +import type { ExtensionType } from '@blocksuite/store'; + +import { + type AdapterPreprocessor, + PreprocessorManager, +} from '../types/preprocessor'; +import type { Markdown } from './type'; + +export type MarkdownAdapterPreprocessor = AdapterPreprocessor; + +const MarkdownPreprocessorIdentifier = + createIdentifier('MarkdownPreprocessor'); + +export const MarkdownPreprocessorExtension = ( + preprocessor: MarkdownAdapterPreprocessor +): ExtensionType & { + identifier: ServiceIdentifier; +} => { + const identifier = MarkdownPreprocessorIdentifier(preprocessor.name); + return { + setup: di => { + di.addImpl(identifier, () => preprocessor); + }, + identifier, + }; +}; + +export class MarkdownPreprocessorManager extends PreprocessorManager< + Markdown, + MarkdownAdapterPreprocessor +> { + constructor(provider: ServiceProvider) { + super(provider, MarkdownPreprocessorIdentifier); + } +} diff --git a/blocksuite/affine/shared/src/adapters/notion-html/delta-converter.ts b/blocksuite/affine/shared/src/adapters/notion-html/delta-converter.ts index 079ca72881..dc378efff7 100644 --- a/blocksuite/affine/shared/src/adapters/notion-html/delta-converter.ts +++ b/blocksuite/affine/shared/src/adapters/notion-html/delta-converter.ts @@ -11,7 +11,7 @@ import { DeltaASTConverter, type DeltaASTConverterOptions, type InlineDeltaMatcher, -} from '../types/adapter.js'; +} from '../types/delta-converter.js'; import type { HtmlAST, InlineHtmlAST } from '../types/hast.js'; export type InlineDeltaToNotionHtmlAdapterMatcher = diff --git a/blocksuite/affine/shared/src/adapters/plain-text/delta-converter.ts b/blocksuite/affine/shared/src/adapters/plain-text/delta-converter.ts index 2d60a528d1..1b398f29b1 100644 --- a/blocksuite/affine/shared/src/adapters/plain-text/delta-converter.ts +++ b/blocksuite/affine/shared/src/adapters/plain-text/delta-converter.ts @@ -5,12 +5,12 @@ import { import type { DeltaInsert, ExtensionType } from '@blocksuite/store'; import type { AffineTextAttributes } from '../../types/index.js'; +import type { TextBuffer } from '../types/adapter.js'; import { type ASTToDeltaMatcher, DeltaASTConverter, type InlineDeltaMatcher, - type TextBuffer, -} from '../types/adapter.js'; +} from '../types/delta-converter.js'; export type InlineDeltaToPlainTextAdapterMatcher = InlineDeltaMatcher; diff --git a/blocksuite/affine/shared/src/adapters/types/adapter.ts b/blocksuite/affine/shared/src/adapters/types/adapter.ts index 69d7ad4b1d..0f2bc3b1f1 100644 --- a/blocksuite/affine/shared/src/adapters/types/adapter.ts +++ b/blocksuite/affine/shared/src/adapters/types/adapter.ts @@ -4,15 +4,13 @@ import { type ASTWalker, type ASTWalkerContext, type BaseAdapter, - type BaseTextAttributes, type BlockSnapshot, BlockSnapshotSchema, - type DeltaInsert, type NodeProps, type Transformer, } from '@blocksuite/store'; -import type { AffineTextAttributes } from '../../types/index.js'; +import type { DeltaASTConverter } from './delta-converter.js'; export const isBlockSnapshotNode = (node: unknown): node is BlockSnapshot => BlockSnapshotSchema.safeParse(node).success; @@ -21,13 +19,6 @@ export type TextBuffer = { content: string; }; -export type DeltaASTConverterOptions = { - trim?: boolean; - pre?: boolean; - pageMap?: Map; - removeLastBr?: boolean; -}; - export type AdapterContext< ONode extends object, TNode extends object = never, @@ -124,56 +115,6 @@ export type BlockAdapterMatcher< }; }; -export abstract class DeltaASTConverter< - TextAttributes extends BaseTextAttributes = BaseTextAttributes, - AST = unknown, -> { - /** - * Convert AST format to delta format - */ - abstract astToDelta( - ast: AST, - options?: unknown - ): DeltaInsert[]; - - /** - * Convert delta format to AST format - */ - abstract deltaToAST( - deltas: DeltaInsert[], - options?: unknown - ): AST[]; -} - -export type InlineDeltaMatcher = { - name: keyof AffineTextAttributes | string; - match: (delta: DeltaInsert) => boolean; - toAST: ( - delta: DeltaInsert, - context: { - configs: Map; - current: TNode; - }, - provider?: ServiceProvider - ) => TNode; -}; - -export type ASTToDeltaMatcher = { - name: string; - match: (ast: AST) => boolean; - toDelta: ( - ast: AST, - context: { - configs: Map; - options: DeltaASTConverterOptions; - toDelta: ( - ast: AST, - options?: DeltaASTConverterOptions - ) => DeltaInsert[]; - } - ) => DeltaInsert[]; -}; - export type AdapterFactory = { // TODO(@chen): Make it return the specific adapter type get: (job: Transformer) => BaseAdapter; diff --git a/blocksuite/affine/shared/src/adapters/types/delta-converter.ts b/blocksuite/affine/shared/src/adapters/types/delta-converter.ts new file mode 100644 index 0000000000..1ae39a1b6a --- /dev/null +++ b/blocksuite/affine/shared/src/adapters/types/delta-converter.ts @@ -0,0 +1,61 @@ +import type { ServiceProvider } from '@blocksuite/global/di'; +import type { BaseTextAttributes, DeltaInsert } from '@blocksuite/store'; + +import type { AffineTextAttributes } from '../../types'; + +export type DeltaASTConverterOptions = { + trim?: boolean; + pre?: boolean; + pageMap?: Map; + removeLastBr?: boolean; +}; + +export abstract class DeltaASTConverter< + TextAttributes extends BaseTextAttributes = BaseTextAttributes, + AST = unknown, +> { + /** + * Convert AST format to delta format + */ + abstract astToDelta( + ast: AST, + options?: unknown + ): DeltaInsert[]; + + /** + * Convert delta format to AST format + */ + abstract deltaToAST( + deltas: DeltaInsert[], + options?: unknown + ): AST[]; +} + +export type InlineDeltaMatcher = { + name: keyof AffineTextAttributes | string; + match: (delta: DeltaInsert) => boolean; + toAST: ( + delta: DeltaInsert, + context: { + configs: Map; + current: TNode; + }, + provider?: ServiceProvider + ) => TNode; +}; + +export type ASTToDeltaMatcher = { + name: string; + match: (ast: AST) => boolean; + toDelta: ( + ast: AST, + context: { + configs: Map; + options: DeltaASTConverterOptions; + toDelta: ( + ast: AST, + options?: DeltaASTConverterOptions + ) => DeltaInsert[]; + } + ) => DeltaInsert[]; +}; diff --git a/blocksuite/affine/shared/src/adapters/types/index.ts b/blocksuite/affine/shared/src/adapters/types/index.ts index 8d2beb1f23..fe3005d099 100644 --- a/blocksuite/affine/shared/src/adapters/types/index.ts +++ b/blocksuite/affine/shared/src/adapters/types/index.ts @@ -1,2 +1,4 @@ export * from './adapter.js'; +export * from './delta-converter.js'; export * from './hast.js'; +export * from './preprocessor.js'; diff --git a/blocksuite/affine/shared/src/adapters/types/preprocessor.ts b/blocksuite/affine/shared/src/adapters/types/preprocessor.ts new file mode 100644 index 0000000000..58aa8a6b65 --- /dev/null +++ b/blocksuite/affine/shared/src/adapters/types/preprocessor.ts @@ -0,0 +1,88 @@ +import type { ServiceIdentifier, ServiceProvider } from '@blocksuite/global/di'; + +/** + * Level of preprocessing + * - doc: Process at to doc snapshot level + * - slice: Process at to slice snapshot level + * - block: Process at to block snapshot level + */ +export type PreprocessLevel = 'doc' | 'slice' | 'block'; + +/** + * Interface for adapter preprocessor + * @template T Type of content to process, defaults to string + */ +export type AdapterPreprocessor = { + /** + * Unique name of the preprocessor + */ + name: string; + + /** + * Levels this preprocessor supports + */ + levels: PreprocessLevel[]; + + /** + * Process the content + * @param content Content to process + * @returns Processed content + */ + preprocess: (content: T) => T; +}; + +/** + * Manager class for handling preprocessors + * @template T Type of content to process + * @template P Type of preprocessor + */ +export abstract class PreprocessorManager> { + protected readonly preprocessors: Map>; + + constructor( + protected readonly provider: ServiceProvider, + protected readonly identifier: ServiceIdentifier

+ ) { + this.preprocessors = new Map(); + + // Initialize Sets for each level + this.preprocessors.set('doc', new Set()); + this.preprocessors.set('slice', new Set()); + this.preprocessors.set('block', new Set()); + + // Register all preprocessors from provider + this.initializePreprocessors(); + } + + /** + * Initialize preprocessors from provider + */ + private initializePreprocessors(): void { + const preprocessors = Array.from( + this.provider.getAll(this.identifier).values() + ); + + for (const preprocessor of preprocessors) { + for (const level of preprocessor.levels) { + const levelSet = this.preprocessors.get(level); + if (levelSet) { + levelSet.add(preprocessor); + } + } + } + } + + /** + * Pre process content at specified level + * @param level Level to process at + * @param content Content to process + * @returns Processed content + */ + process(level: PreprocessLevel, content: T): T { + const processors = this.preprocessors.get(level) ?? new Set(); + return Array.from(processors).reduce( + (result, preprocessor) => preprocessor.preprocess(result), + content + ); + } +} diff --git a/packages/common/nbstore/src/sync/indexer/crawler.ts b/packages/common/nbstore/src/sync/indexer/crawler.ts index 348c3613f5..7afa3966e6 100644 --- a/packages/common/nbstore/src/sync/indexer/crawler.ts +++ b/packages/common/nbstore/src/sync/indexer/crawler.ts @@ -1,9 +1,5 @@ -import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; +import { MarkdownAdapterExtension } from '@blocksuite/affine/adapters'; import { Container } from '@blocksuite/affine/global/di'; -import { - InlineDeltaToMarkdownAdapterExtensions, - MarkdownInlineToDeltaAdapterExtensions, -} from '@blocksuite/affine/inlines/preset'; import type { AttachmentBlockModel, BookmarkBlockModel, @@ -123,11 +119,7 @@ function generateMarkdownPreviewBuilder( }; const container = new Container(); - [ - ...MarkdownInlineToDeltaAdapterExtensions, - ...defaultBlockMarkdownAdapterMatchers, - ...InlineDeltaToMarkdownAdapterExtensions, - ].forEach(ext => { + [...MarkdownAdapterExtension].forEach(ext => { ext.setup(container); }); diff --git a/packages/frontend/apps/android/src/app.tsx b/packages/frontend/apps/android/src/app.tsx index 14585ae706..d05daf4237 100644 --- a/packages/frontend/apps/android/src/app.tsx +++ b/packages/frontend/apps/android/src/app.tsx @@ -26,12 +26,8 @@ import { configureBrowserWorkspaceFlavours } from '@affine/core/modules/workspac import { getWorkerUrl } from '@affine/env/worker'; import { I18n } from '@affine/i18n'; import { StoreManagerClient } from '@affine/nbstore/worker/client'; -import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; +import { MarkdownAdapterExtension } from '@blocksuite/affine/adapters'; import { Container } from '@blocksuite/affine/global/di'; -import { - InlineDeltaToMarkdownAdapterExtensions, - MarkdownInlineToDeltaAdapterExtensions, -} from '@blocksuite/affine/inlines/preset'; import { docLinkBaseURLMiddleware, MarkdownAdapter, @@ -203,11 +199,7 @@ framework.impl(AIButtonProvider, { const snapshot = transformer.docToSnapshot(blockSuiteDoc); const container = new Container(); - [ - ...MarkdownInlineToDeltaAdapterExtensions, - ...defaultBlockMarkdownAdapterMatchers, - ...InlineDeltaToMarkdownAdapterExtensions, - ].forEach(ext => { + [...MarkdownAdapterExtension].forEach(ext => { ext.setup(container); }); const provider = container.provider(); diff --git a/packages/frontend/apps/ios/src/app.tsx b/packages/frontend/apps/ios/src/app.tsx index 5abfbac735..1a28ad8f18 100644 --- a/packages/frontend/apps/ios/src/app.tsx +++ b/packages/frontend/apps/ios/src/app.tsx @@ -38,13 +38,9 @@ import { configureBrowserWorkspaceFlavours } from '@affine/core/modules/workspac import { getWorkerUrl } from '@affine/env/worker'; import { I18n } from '@affine/i18n'; import { StoreManagerClient } from '@affine/nbstore/worker/client'; -import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; +import { MarkdownAdapterExtension } from '@blocksuite/affine/adapters'; import { MarkdownTransformer } from '@blocksuite/affine/blocks/root'; import { Container } from '@blocksuite/affine/global/di'; -import { - InlineDeltaToMarkdownAdapterExtensions, - MarkdownInlineToDeltaAdapterExtensions, -} from '@blocksuite/affine/inlines/preset'; import { docLinkBaseURLMiddleware, MarkdownAdapter, @@ -266,11 +262,7 @@ const frameworkProvider = framework.provider(); const snapshot = transformer.docToSnapshot(blockSuiteDoc); const container = new Container(); - [ - ...MarkdownInlineToDeltaAdapterExtensions, - ...defaultBlockMarkdownAdapterMatchers, - ...InlineDeltaToMarkdownAdapterExtensions, - ].forEach(ext => { + [...MarkdownAdapterExtension].forEach(ext => { ext.setup(container); }); const provider = container.provider(); diff --git a/packages/frontend/core/src/blocksuite/ai/components/text-renderer.ts b/packages/frontend/core/src/blocksuite/ai/components/text-renderer.ts index eea45fe589..7ffeb18f57 100644 --- a/packages/frontend/core/src/blocksuite/ai/components/text-renderer.ts +++ b/packages/frontend/core/src/blocksuite/ai/components/text-renderer.ts @@ -1,5 +1,5 @@ import { createReactComponentFromLit } from '@affine/component'; -import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; +import { MarkdownAdapterExtension } from '@blocksuite/affine/adapters'; import { defaultImageProxyMiddleware, ImageProxyService, @@ -7,10 +7,6 @@ import { import { PageEditorBlockSpecs } from '@blocksuite/affine/extensions'; import { Container, type ServiceProvider } from '@blocksuite/affine/global/di'; import { WithDisposable } from '@blocksuite/affine/global/lit'; -import { - InlineDeltaToMarkdownAdapterExtensions, - MarkdownInlineToDeltaAdapterExtensions, -} from '@blocksuite/affine/inlines/preset'; import { codeBlockWrapMiddleware } from '@blocksuite/affine/shared/adapters'; import { LinkPreviewerService } from '@blocksuite/affine/shared/services'; import { @@ -215,11 +211,7 @@ export class TextRenderer extends WithDisposable(ShadowlessElement) { provider = this.host.std.provider; } else { const container = new Container(); - [ - ...MarkdownInlineToDeltaAdapterExtensions, - ...defaultBlockMarkdownAdapterMatchers, - ...InlineDeltaToMarkdownAdapterExtensions, - ].forEach(ext => { + [...MarkdownAdapterExtension].forEach(ext => { ext.setup(container); }); diff --git a/packages/frontend/core/src/blocksuite/ai/mini-mindmap/__tests__/mindmap-preview.unit.spec.ts b/packages/frontend/core/src/blocksuite/ai/mini-mindmap/__tests__/mindmap-preview.unit.spec.ts index 0e17214677..2fe19bb3cd 100644 --- a/packages/frontend/core/src/blocksuite/ai/mini-mindmap/__tests__/mindmap-preview.unit.spec.ts +++ b/packages/frontend/core/src/blocksuite/ai/mini-mindmap/__tests__/mindmap-preview.unit.spec.ts @@ -1,20 +1,12 @@ -import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; +import { MarkdownAdapterExtension } from '@blocksuite/affine/adapters'; import { Container } from '@blocksuite/affine/global/di'; -import { - InlineDeltaToMarkdownAdapterExtensions, - MarkdownInlineToDeltaAdapterExtensions, -} from '@blocksuite/affine/inlines/preset'; import { TestWorkspace } from '@blocksuite/affine/store/test'; import { describe, expect, test } from 'vitest'; import { markdownToMindmap } from '../mindmap-preview.js'; const container = new Container(); -[ - ...MarkdownInlineToDeltaAdapterExtensions, - ...defaultBlockMarkdownAdapterMatchers, - ...InlineDeltaToMarkdownAdapterExtensions, -].forEach(ext => { +[...MarkdownAdapterExtension].forEach(ext => { ext.setup(container); }); const provider = container.provider();