feat(editor): support markdown adapter preprocessed with latex delimiters (#11431)

To close [BS-2870](https://linear.app/affine-design/issue/BS-2870/支持识别-和-[-包裹内容为公式)

## Add Markdown Preprocessor Extension and Enhanced LaTeX Support

### Markdown Preprocessor Extension
This PR introduces a new preprocessor extension for Markdown adapters that allows preprocessing of content before conversion:

Adds a new PreprocessorManager for handling text transformations
Introduces extensible preprocessor interface that supports different processing levels (block/slice/doc)

Integrates preprocessor extension into the existing Markdown adapter workflow

### LaTeX Support Enhancement
Extends LaTeX support to handle both traditional and alternative syntax:
Adds support for backslash LaTeX syntax:

Block math: ```\[...\] ``` alongside existing ```$$...$$```
Inline math: ```\(...\) ``` alongside existing ```$...$```

Implements LaTeX preprocessor to standardize syntax before conversion

Updates tests to cover both syntax variants
This commit is contained in:
donteatfriedrice
2025-04-07 02:18:04 +00:00
parent e376992ccf
commit 568a390b75
31 changed files with 450 additions and 260 deletions

View File

@@ -11,7 +11,7 @@ import {
DeltaASTConverter,
type DeltaASTConverterOptions,
type InlineDeltaMatcher,
} from '../types/adapter.js';
} from '../types/delta-converter.js';
import type { HtmlAST, InlineHtmlAST } from '../types/hast.js';
import { AdapterTextUtils } from '../utils/text.js';

View File

@@ -30,11 +30,14 @@ export {
MarkdownAdapter,
MarkdownAdapterFactoryExtension,
MarkdownAdapterFactoryIdentifier,
type MarkdownAdapterPreprocessor,
type MarkdownAST,
MarkdownASTToDeltaExtension,
type MarkdownASTToDeltaMatcher,
MarkdownASTToDeltaMatcherIdentifier,
MarkdownDeltaConverter,
MarkdownPreprocessorExtension,
MarkdownPreprocessorManager,
} from './markdown';
export * from './middlewares';
export * from './mix-text';

View File

@@ -10,7 +10,7 @@ import {
type ASTToDeltaMatcher,
DeltaASTConverter,
type InlineDeltaMatcher,
} from '../types/adapter.js';
} from '../types/delta-converter.js';
import type { MarkdownAST } from './type.js';
export type InlineDeltaToMarkdownAdapterMatcher =

View File

@@ -1,4 +1,5 @@
export * from './block-adapter.js';
export * from './delta-converter.js';
export * from './markdown.js';
export * from './preprocessor.js';
export * from './type.js';

View File

@@ -37,6 +37,7 @@ import {
MarkdownDeltaConverter,
} from './delta-converter';
import { remarkGfm } from './gfm';
import { MarkdownPreprocessorManager } from './preprocessor';
import type { Markdown, MarkdownAST } from './type';
type MarkdownToSliceSnapshotPayload = {
@@ -167,6 +168,7 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
};
deltaConverter: MarkdownDeltaConverter;
preprocessorManager: MarkdownPreprocessorManager;
readonly blockMatchers: BlockMarkdownAdapterMatcher[];
@@ -187,6 +189,7 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
inlineDeltaToMarkdownAdapterMatchers,
markdownInlineToDeltaMatchers
);
this.preprocessorManager = new MarkdownPreprocessorManager(provider);
}
private _astToMarkdown(ast: Root) {
@@ -273,7 +276,11 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
async toBlockSnapshot(
payload: ToBlockSnapshotPayload<Markdown>
): Promise<BlockSnapshot> {
const markdownAst = this._markdownToAst(payload.file);
const markdownFile = this.preprocessorManager.process(
'block',
payload.file
);
const markdownAst = this._markdownToAst(markdownFile);
const blockSnapshotRoot = {
type: 'block',
id: nanoid(),
@@ -297,7 +304,8 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
async toDocSnapshot(
payload: ToDocSnapshotPayload<Markdown>
): Promise<DocSnapshot> {
const markdownAst = this._markdownToAst(payload.file);
const markdownFile = this.preprocessorManager.process('doc', payload.file);
const markdownAst = this._markdownToAst(markdownFile);
const blockSnapshotRoot = {
type: 'block',
id: nanoid(),
@@ -356,67 +364,11 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
async toSliceSnapshot(
payload: MarkdownToSliceSnapshotPayload
): Promise<SliceSnapshot | null> {
let codeFence = '';
payload.file = payload.file
.split('\n')
.map(line => {
if (line.trimStart().startsWith('-')) {
return line;
}
let trimmedLine = line.trimStart();
if (!codeFence && trimmedLine.startsWith('```')) {
codeFence = trimmedLine.substring(
0,
trimmedLine.lastIndexOf('```') + 3
);
if (codeFence.split('').every(c => c === '`')) {
return line;
}
codeFence = '';
}
if (!codeFence && trimmedLine.startsWith('~~~')) {
codeFence = trimmedLine.substring(
0,
trimmedLine.lastIndexOf('~~~') + 3
);
if (codeFence.split('').every(c => c === '~')) {
return line;
}
codeFence = '';
}
if (
!!codeFence &&
trimmedLine.startsWith(codeFence) &&
trimmedLine.lastIndexOf(codeFence) === 0
) {
codeFence = '';
}
if (codeFence) {
return line;
}
trimmedLine = trimmedLine.trimEnd();
if (!trimmedLine.startsWith('<') && !trimmedLine.endsWith('>')) {
// check if it is a url link and wrap it with the angle brackets
// sometimes the url includes emphasis `_` that will break URL parsing
//
// eg. /MuawcBMT1Mzvoar09-_66?mode=page&blockIds=rL2_GXbtLU2SsJVfCSmh_
// https://www.markdownguide.org/basic-syntax/#urls-and-email-addresses
try {
const valid =
URL.canParse?.(trimmedLine) ?? Boolean(new URL(trimmedLine));
if (valid) {
return `<${trimmedLine}>`;
}
} catch (err) {
console.log(err);
}
}
return line.replace(/^ /, '&#x20;');
})
.join('\n');
const markdownAst = this._markdownToAst(payload.file);
const markdownFile = this.preprocessorManager.process(
'slice',
payload.file
);
const markdownAst = this._markdownToAst(markdownFile);
const blockSnapshotRoot = {
type: 'block',
id: nanoid(),

View File

@@ -0,0 +1,40 @@
import {
createIdentifier,
type ServiceIdentifier,
type ServiceProvider,
} from '@blocksuite/global/di';
import type { ExtensionType } from '@blocksuite/store';
import {
type AdapterPreprocessor,
PreprocessorManager,
} from '../types/preprocessor';
import type { Markdown } from './type';
export type MarkdownAdapterPreprocessor = AdapterPreprocessor<Markdown>;
const MarkdownPreprocessorIdentifier =
createIdentifier<MarkdownAdapterPreprocessor>('MarkdownPreprocessor');
export const MarkdownPreprocessorExtension = (
preprocessor: MarkdownAdapterPreprocessor
): ExtensionType & {
identifier: ServiceIdentifier<MarkdownAdapterPreprocessor>;
} => {
const identifier = MarkdownPreprocessorIdentifier(preprocessor.name);
return {
setup: di => {
di.addImpl(identifier, () => preprocessor);
},
identifier,
};
};
export class MarkdownPreprocessorManager extends PreprocessorManager<
Markdown,
MarkdownAdapterPreprocessor
> {
constructor(provider: ServiceProvider) {
super(provider, MarkdownPreprocessorIdentifier);
}
}

View File

@@ -11,7 +11,7 @@ import {
DeltaASTConverter,
type DeltaASTConverterOptions,
type InlineDeltaMatcher,
} from '../types/adapter.js';
} from '../types/delta-converter.js';
import type { HtmlAST, InlineHtmlAST } from '../types/hast.js';
export type InlineDeltaToNotionHtmlAdapterMatcher =

View File

@@ -5,12 +5,12 @@ import {
import type { DeltaInsert, ExtensionType } from '@blocksuite/store';
import type { AffineTextAttributes } from '../../types/index.js';
import type { TextBuffer } from '../types/adapter.js';
import {
type ASTToDeltaMatcher,
DeltaASTConverter,
type InlineDeltaMatcher,
type TextBuffer,
} from '../types/adapter.js';
} from '../types/delta-converter.js';
export type InlineDeltaToPlainTextAdapterMatcher =
InlineDeltaMatcher<TextBuffer>;

View File

@@ -4,15 +4,13 @@ import {
type ASTWalker,
type ASTWalkerContext,
type BaseAdapter,
type BaseTextAttributes,
type BlockSnapshot,
BlockSnapshotSchema,
type DeltaInsert,
type NodeProps,
type Transformer,
} from '@blocksuite/store';
import type { AffineTextAttributes } from '../../types/index.js';
import type { DeltaASTConverter } from './delta-converter.js';
export const isBlockSnapshotNode = (node: unknown): node is BlockSnapshot =>
BlockSnapshotSchema.safeParse(node).success;
@@ -21,13 +19,6 @@ export type TextBuffer = {
content: string;
};
export type DeltaASTConverterOptions = {
trim?: boolean;
pre?: boolean;
pageMap?: Map<string, string>;
removeLastBr?: boolean;
};
export type AdapterContext<
ONode extends object,
TNode extends object = never,
@@ -124,56 +115,6 @@ export type BlockAdapterMatcher<
};
};
export abstract class DeltaASTConverter<
TextAttributes extends BaseTextAttributes = BaseTextAttributes,
AST = unknown,
> {
/**
* Convert AST format to delta format
*/
abstract astToDelta(
ast: AST,
options?: unknown
): DeltaInsert<TextAttributes>[];
/**
* Convert delta format to AST format
*/
abstract deltaToAST(
deltas: DeltaInsert<TextAttributes>[],
options?: unknown
): AST[];
}
export type InlineDeltaMatcher<TNode extends object = never> = {
name: keyof AffineTextAttributes | string;
match: (delta: DeltaInsert<AffineTextAttributes>) => boolean;
toAST: (
delta: DeltaInsert<AffineTextAttributes>,
context: {
configs: Map<string, string>;
current: TNode;
},
provider?: ServiceProvider
) => TNode;
};
export type ASTToDeltaMatcher<AST> = {
name: string;
match: (ast: AST) => boolean;
toDelta: (
ast: AST,
context: {
configs: Map<string, string>;
options: DeltaASTConverterOptions;
toDelta: (
ast: AST,
options?: DeltaASTConverterOptions
) => DeltaInsert<AffineTextAttributes>[];
}
) => DeltaInsert<AffineTextAttributes>[];
};
export type AdapterFactory = {
// TODO(@chen): Make it return the specific adapter type
get: (job: Transformer) => BaseAdapter;

View File

@@ -0,0 +1,61 @@
import type { ServiceProvider } from '@blocksuite/global/di';
import type { BaseTextAttributes, DeltaInsert } from '@blocksuite/store';
import type { AffineTextAttributes } from '../../types';
export type DeltaASTConverterOptions = {
trim?: boolean;
pre?: boolean;
pageMap?: Map<string, string>;
removeLastBr?: boolean;
};
export abstract class DeltaASTConverter<
TextAttributes extends BaseTextAttributes = BaseTextAttributes,
AST = unknown,
> {
/**
* Convert AST format to delta format
*/
abstract astToDelta(
ast: AST,
options?: unknown
): DeltaInsert<TextAttributes>[];
/**
* Convert delta format to AST format
*/
abstract deltaToAST(
deltas: DeltaInsert<TextAttributes>[],
options?: unknown
): AST[];
}
export type InlineDeltaMatcher<TNode extends object = never> = {
name: keyof AffineTextAttributes | string;
match: (delta: DeltaInsert<AffineTextAttributes>) => boolean;
toAST: (
delta: DeltaInsert<AffineTextAttributes>,
context: {
configs: Map<string, string>;
current: TNode;
},
provider?: ServiceProvider
) => TNode;
};
export type ASTToDeltaMatcher<AST> = {
name: string;
match: (ast: AST) => boolean;
toDelta: (
ast: AST,
context: {
configs: Map<string, string>;
options: DeltaASTConverterOptions;
toDelta: (
ast: AST,
options?: DeltaASTConverterOptions
) => DeltaInsert<AffineTextAttributes>[];
}
) => DeltaInsert<AffineTextAttributes>[];
};

View File

@@ -1,2 +1,4 @@
export * from './adapter.js';
export * from './delta-converter.js';
export * from './hast.js';
export * from './preprocessor.js';

View File

@@ -0,0 +1,88 @@
import type { ServiceIdentifier, ServiceProvider } from '@blocksuite/global/di';
/**
* Level of preprocessing
* - doc: Process at to doc snapshot level
* - slice: Process at to slice snapshot level
* - block: Process at to block snapshot level
*/
export type PreprocessLevel = 'doc' | 'slice' | 'block';
/**
* Interface for adapter preprocessor
* @template T Type of content to process, defaults to string
*/
export type AdapterPreprocessor<T = string> = {
/**
* Unique name of the preprocessor
*/
name: string;
/**
* Levels this preprocessor supports
*/
levels: PreprocessLevel[];
/**
* Process the content
* @param content Content to process
* @returns Processed content
*/
preprocess: (content: T) => T;
};
/**
* Manager class for handling preprocessors
* @template T Type of content to process
* @template P Type of preprocessor
*/
export abstract class PreprocessorManager<T, P extends AdapterPreprocessor<T>> {
protected readonly preprocessors: Map<PreprocessLevel, Set<P>>;
constructor(
protected readonly provider: ServiceProvider,
protected readonly identifier: ServiceIdentifier<P>
) {
this.preprocessors = new Map();
// Initialize Sets for each level
this.preprocessors.set('doc', new Set());
this.preprocessors.set('slice', new Set());
this.preprocessors.set('block', new Set());
// Register all preprocessors from provider
this.initializePreprocessors();
}
/**
* Initialize preprocessors from provider
*/
private initializePreprocessors(): void {
const preprocessors = Array.from(
this.provider.getAll(this.identifier).values()
);
for (const preprocessor of preprocessors) {
for (const level of preprocessor.levels) {
const levelSet = this.preprocessors.get(level);
if (levelSet) {
levelSet.add(preprocessor);
}
}
}
}
/**
* Pre process content at specified level
* @param level Level to process at
* @param content Content to process
* @returns Processed content
*/
process(level: PreprocessLevel, content: T): T {
const processors = this.preprocessors.get(level) ?? new Set();
return Array.from(processors).reduce(
(result, preprocessor) => preprocessor.preprocess(result),
content
);
}
}