mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-19 15:26:59 +08:00
feat(editor): support markdown adapter preprocessed with latex delimiters (#11431)
To close [BS-2870](https://linear.app/affine-design/issue/BS-2870/支持识别-和-[-包裹内容为公式) ## Add Markdown Preprocessor Extension and Enhanced LaTeX Support ### Markdown Preprocessor Extension This PR introduces a new preprocessor extension for Markdown adapters that allows preprocessing of content before conversion: Adds a new PreprocessorManager for handling text transformations Introduces extensible preprocessor interface that supports different processing levels (block/slice/doc) Integrates preprocessor extension into the existing Markdown adapter workflow ### LaTeX Support Enhancement Extends LaTeX support to handle both traditional and alternative syntax: Adds support for backslash LaTeX syntax: Block math: ```\[...\] ``` alongside existing ```$$...$$``` Inline math: ```\(...\) ``` alongside existing ```$...$``` Implements LaTeX preprocessor to standardize syntax before conversion Updates tests to cover both syntax variants
This commit is contained in:
@@ -11,7 +11,7 @@ import {
|
||||
DeltaASTConverter,
|
||||
type DeltaASTConverterOptions,
|
||||
type InlineDeltaMatcher,
|
||||
} from '../types/adapter.js';
|
||||
} from '../types/delta-converter.js';
|
||||
import type { HtmlAST, InlineHtmlAST } from '../types/hast.js';
|
||||
import { AdapterTextUtils } from '../utils/text.js';
|
||||
|
||||
|
||||
@@ -30,11 +30,14 @@ export {
|
||||
MarkdownAdapter,
|
||||
MarkdownAdapterFactoryExtension,
|
||||
MarkdownAdapterFactoryIdentifier,
|
||||
type MarkdownAdapterPreprocessor,
|
||||
type MarkdownAST,
|
||||
MarkdownASTToDeltaExtension,
|
||||
type MarkdownASTToDeltaMatcher,
|
||||
MarkdownASTToDeltaMatcherIdentifier,
|
||||
MarkdownDeltaConverter,
|
||||
MarkdownPreprocessorExtension,
|
||||
MarkdownPreprocessorManager,
|
||||
} from './markdown';
|
||||
export * from './middlewares';
|
||||
export * from './mix-text';
|
||||
|
||||
@@ -10,7 +10,7 @@ import {
|
||||
type ASTToDeltaMatcher,
|
||||
DeltaASTConverter,
|
||||
type InlineDeltaMatcher,
|
||||
} from '../types/adapter.js';
|
||||
} from '../types/delta-converter.js';
|
||||
import type { MarkdownAST } from './type.js';
|
||||
|
||||
export type InlineDeltaToMarkdownAdapterMatcher =
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
export * from './block-adapter.js';
|
||||
export * from './delta-converter.js';
|
||||
export * from './markdown.js';
|
||||
export * from './preprocessor.js';
|
||||
export * from './type.js';
|
||||
|
||||
@@ -37,6 +37,7 @@ import {
|
||||
MarkdownDeltaConverter,
|
||||
} from './delta-converter';
|
||||
import { remarkGfm } from './gfm';
|
||||
import { MarkdownPreprocessorManager } from './preprocessor';
|
||||
import type { Markdown, MarkdownAST } from './type';
|
||||
|
||||
type MarkdownToSliceSnapshotPayload = {
|
||||
@@ -167,6 +168,7 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
|
||||
};
|
||||
|
||||
deltaConverter: MarkdownDeltaConverter;
|
||||
preprocessorManager: MarkdownPreprocessorManager;
|
||||
|
||||
readonly blockMatchers: BlockMarkdownAdapterMatcher[];
|
||||
|
||||
@@ -187,6 +189,7 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
|
||||
inlineDeltaToMarkdownAdapterMatchers,
|
||||
markdownInlineToDeltaMatchers
|
||||
);
|
||||
this.preprocessorManager = new MarkdownPreprocessorManager(provider);
|
||||
}
|
||||
|
||||
private _astToMarkdown(ast: Root) {
|
||||
@@ -273,7 +276,11 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
|
||||
async toBlockSnapshot(
|
||||
payload: ToBlockSnapshotPayload<Markdown>
|
||||
): Promise<BlockSnapshot> {
|
||||
const markdownAst = this._markdownToAst(payload.file);
|
||||
const markdownFile = this.preprocessorManager.process(
|
||||
'block',
|
||||
payload.file
|
||||
);
|
||||
const markdownAst = this._markdownToAst(markdownFile);
|
||||
const blockSnapshotRoot = {
|
||||
type: 'block',
|
||||
id: nanoid(),
|
||||
@@ -297,7 +304,8 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
|
||||
async toDocSnapshot(
|
||||
payload: ToDocSnapshotPayload<Markdown>
|
||||
): Promise<DocSnapshot> {
|
||||
const markdownAst = this._markdownToAst(payload.file);
|
||||
const markdownFile = this.preprocessorManager.process('doc', payload.file);
|
||||
const markdownAst = this._markdownToAst(markdownFile);
|
||||
const blockSnapshotRoot = {
|
||||
type: 'block',
|
||||
id: nanoid(),
|
||||
@@ -356,67 +364,11 @@ export class MarkdownAdapter extends BaseAdapter<Markdown> {
|
||||
async toSliceSnapshot(
|
||||
payload: MarkdownToSliceSnapshotPayload
|
||||
): Promise<SliceSnapshot | null> {
|
||||
let codeFence = '';
|
||||
payload.file = payload.file
|
||||
.split('\n')
|
||||
.map(line => {
|
||||
if (line.trimStart().startsWith('-')) {
|
||||
return line;
|
||||
}
|
||||
let trimmedLine = line.trimStart();
|
||||
if (!codeFence && trimmedLine.startsWith('```')) {
|
||||
codeFence = trimmedLine.substring(
|
||||
0,
|
||||
trimmedLine.lastIndexOf('```') + 3
|
||||
);
|
||||
if (codeFence.split('').every(c => c === '`')) {
|
||||
return line;
|
||||
}
|
||||
codeFence = '';
|
||||
}
|
||||
if (!codeFence && trimmedLine.startsWith('~~~')) {
|
||||
codeFence = trimmedLine.substring(
|
||||
0,
|
||||
trimmedLine.lastIndexOf('~~~') + 3
|
||||
);
|
||||
if (codeFence.split('').every(c => c === '~')) {
|
||||
return line;
|
||||
}
|
||||
codeFence = '';
|
||||
}
|
||||
if (
|
||||
!!codeFence &&
|
||||
trimmedLine.startsWith(codeFence) &&
|
||||
trimmedLine.lastIndexOf(codeFence) === 0
|
||||
) {
|
||||
codeFence = '';
|
||||
}
|
||||
if (codeFence) {
|
||||
return line;
|
||||
}
|
||||
|
||||
trimmedLine = trimmedLine.trimEnd();
|
||||
if (!trimmedLine.startsWith('<') && !trimmedLine.endsWith('>')) {
|
||||
// check if it is a url link and wrap it with the angle brackets
|
||||
// sometimes the url includes emphasis `_` that will break URL parsing
|
||||
//
|
||||
// eg. /MuawcBMT1Mzvoar09-_66?mode=page&blockIds=rL2_GXbtLU2SsJVfCSmh_
|
||||
// https://www.markdownguide.org/basic-syntax/#urls-and-email-addresses
|
||||
try {
|
||||
const valid =
|
||||
URL.canParse?.(trimmedLine) ?? Boolean(new URL(trimmedLine));
|
||||
if (valid) {
|
||||
return `<${trimmedLine}>`;
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
}
|
||||
}
|
||||
|
||||
return line.replace(/^ /, ' ');
|
||||
})
|
||||
.join('\n');
|
||||
const markdownAst = this._markdownToAst(payload.file);
|
||||
const markdownFile = this.preprocessorManager.process(
|
||||
'slice',
|
||||
payload.file
|
||||
);
|
||||
const markdownAst = this._markdownToAst(markdownFile);
|
||||
const blockSnapshotRoot = {
|
||||
type: 'block',
|
||||
id: nanoid(),
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
import {
|
||||
createIdentifier,
|
||||
type ServiceIdentifier,
|
||||
type ServiceProvider,
|
||||
} from '@blocksuite/global/di';
|
||||
import type { ExtensionType } from '@blocksuite/store';
|
||||
|
||||
import {
|
||||
type AdapterPreprocessor,
|
||||
PreprocessorManager,
|
||||
} from '../types/preprocessor';
|
||||
import type { Markdown } from './type';
|
||||
|
||||
export type MarkdownAdapterPreprocessor = AdapterPreprocessor<Markdown>;
|
||||
|
||||
const MarkdownPreprocessorIdentifier =
|
||||
createIdentifier<MarkdownAdapterPreprocessor>('MarkdownPreprocessor');
|
||||
|
||||
export const MarkdownPreprocessorExtension = (
|
||||
preprocessor: MarkdownAdapterPreprocessor
|
||||
): ExtensionType & {
|
||||
identifier: ServiceIdentifier<MarkdownAdapterPreprocessor>;
|
||||
} => {
|
||||
const identifier = MarkdownPreprocessorIdentifier(preprocessor.name);
|
||||
return {
|
||||
setup: di => {
|
||||
di.addImpl(identifier, () => preprocessor);
|
||||
},
|
||||
identifier,
|
||||
};
|
||||
};
|
||||
|
||||
export class MarkdownPreprocessorManager extends PreprocessorManager<
|
||||
Markdown,
|
||||
MarkdownAdapterPreprocessor
|
||||
> {
|
||||
constructor(provider: ServiceProvider) {
|
||||
super(provider, MarkdownPreprocessorIdentifier);
|
||||
}
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import {
|
||||
DeltaASTConverter,
|
||||
type DeltaASTConverterOptions,
|
||||
type InlineDeltaMatcher,
|
||||
} from '../types/adapter.js';
|
||||
} from '../types/delta-converter.js';
|
||||
import type { HtmlAST, InlineHtmlAST } from '../types/hast.js';
|
||||
|
||||
export type InlineDeltaToNotionHtmlAdapterMatcher =
|
||||
|
||||
@@ -5,12 +5,12 @@ import {
|
||||
import type { DeltaInsert, ExtensionType } from '@blocksuite/store';
|
||||
|
||||
import type { AffineTextAttributes } from '../../types/index.js';
|
||||
import type { TextBuffer } from '../types/adapter.js';
|
||||
import {
|
||||
type ASTToDeltaMatcher,
|
||||
DeltaASTConverter,
|
||||
type InlineDeltaMatcher,
|
||||
type TextBuffer,
|
||||
} from '../types/adapter.js';
|
||||
} from '../types/delta-converter.js';
|
||||
|
||||
export type InlineDeltaToPlainTextAdapterMatcher =
|
||||
InlineDeltaMatcher<TextBuffer>;
|
||||
|
||||
@@ -4,15 +4,13 @@ import {
|
||||
type ASTWalker,
|
||||
type ASTWalkerContext,
|
||||
type BaseAdapter,
|
||||
type BaseTextAttributes,
|
||||
type BlockSnapshot,
|
||||
BlockSnapshotSchema,
|
||||
type DeltaInsert,
|
||||
type NodeProps,
|
||||
type Transformer,
|
||||
} from '@blocksuite/store';
|
||||
|
||||
import type { AffineTextAttributes } from '../../types/index.js';
|
||||
import type { DeltaASTConverter } from './delta-converter.js';
|
||||
|
||||
export const isBlockSnapshotNode = (node: unknown): node is BlockSnapshot =>
|
||||
BlockSnapshotSchema.safeParse(node).success;
|
||||
@@ -21,13 +19,6 @@ export type TextBuffer = {
|
||||
content: string;
|
||||
};
|
||||
|
||||
export type DeltaASTConverterOptions = {
|
||||
trim?: boolean;
|
||||
pre?: boolean;
|
||||
pageMap?: Map<string, string>;
|
||||
removeLastBr?: boolean;
|
||||
};
|
||||
|
||||
export type AdapterContext<
|
||||
ONode extends object,
|
||||
TNode extends object = never,
|
||||
@@ -124,56 +115,6 @@ export type BlockAdapterMatcher<
|
||||
};
|
||||
};
|
||||
|
||||
export abstract class DeltaASTConverter<
|
||||
TextAttributes extends BaseTextAttributes = BaseTextAttributes,
|
||||
AST = unknown,
|
||||
> {
|
||||
/**
|
||||
* Convert AST format to delta format
|
||||
*/
|
||||
abstract astToDelta(
|
||||
ast: AST,
|
||||
options?: unknown
|
||||
): DeltaInsert<TextAttributes>[];
|
||||
|
||||
/**
|
||||
* Convert delta format to AST format
|
||||
*/
|
||||
abstract deltaToAST(
|
||||
deltas: DeltaInsert<TextAttributes>[],
|
||||
options?: unknown
|
||||
): AST[];
|
||||
}
|
||||
|
||||
export type InlineDeltaMatcher<TNode extends object = never> = {
|
||||
name: keyof AffineTextAttributes | string;
|
||||
match: (delta: DeltaInsert<AffineTextAttributes>) => boolean;
|
||||
toAST: (
|
||||
delta: DeltaInsert<AffineTextAttributes>,
|
||||
context: {
|
||||
configs: Map<string, string>;
|
||||
current: TNode;
|
||||
},
|
||||
provider?: ServiceProvider
|
||||
) => TNode;
|
||||
};
|
||||
|
||||
export type ASTToDeltaMatcher<AST> = {
|
||||
name: string;
|
||||
match: (ast: AST) => boolean;
|
||||
toDelta: (
|
||||
ast: AST,
|
||||
context: {
|
||||
configs: Map<string, string>;
|
||||
options: DeltaASTConverterOptions;
|
||||
toDelta: (
|
||||
ast: AST,
|
||||
options?: DeltaASTConverterOptions
|
||||
) => DeltaInsert<AffineTextAttributes>[];
|
||||
}
|
||||
) => DeltaInsert<AffineTextAttributes>[];
|
||||
};
|
||||
|
||||
export type AdapterFactory = {
|
||||
// TODO(@chen): Make it return the specific adapter type
|
||||
get: (job: Transformer) => BaseAdapter;
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import type { ServiceProvider } from '@blocksuite/global/di';
|
||||
import type { BaseTextAttributes, DeltaInsert } from '@blocksuite/store';
|
||||
|
||||
import type { AffineTextAttributes } from '../../types';
|
||||
|
||||
export type DeltaASTConverterOptions = {
|
||||
trim?: boolean;
|
||||
pre?: boolean;
|
||||
pageMap?: Map<string, string>;
|
||||
removeLastBr?: boolean;
|
||||
};
|
||||
|
||||
export abstract class DeltaASTConverter<
|
||||
TextAttributes extends BaseTextAttributes = BaseTextAttributes,
|
||||
AST = unknown,
|
||||
> {
|
||||
/**
|
||||
* Convert AST format to delta format
|
||||
*/
|
||||
abstract astToDelta(
|
||||
ast: AST,
|
||||
options?: unknown
|
||||
): DeltaInsert<TextAttributes>[];
|
||||
|
||||
/**
|
||||
* Convert delta format to AST format
|
||||
*/
|
||||
abstract deltaToAST(
|
||||
deltas: DeltaInsert<TextAttributes>[],
|
||||
options?: unknown
|
||||
): AST[];
|
||||
}
|
||||
|
||||
export type InlineDeltaMatcher<TNode extends object = never> = {
|
||||
name: keyof AffineTextAttributes | string;
|
||||
match: (delta: DeltaInsert<AffineTextAttributes>) => boolean;
|
||||
toAST: (
|
||||
delta: DeltaInsert<AffineTextAttributes>,
|
||||
context: {
|
||||
configs: Map<string, string>;
|
||||
current: TNode;
|
||||
},
|
||||
provider?: ServiceProvider
|
||||
) => TNode;
|
||||
};
|
||||
|
||||
export type ASTToDeltaMatcher<AST> = {
|
||||
name: string;
|
||||
match: (ast: AST) => boolean;
|
||||
toDelta: (
|
||||
ast: AST,
|
||||
context: {
|
||||
configs: Map<string, string>;
|
||||
options: DeltaASTConverterOptions;
|
||||
toDelta: (
|
||||
ast: AST,
|
||||
options?: DeltaASTConverterOptions
|
||||
) => DeltaInsert<AffineTextAttributes>[];
|
||||
}
|
||||
) => DeltaInsert<AffineTextAttributes>[];
|
||||
};
|
||||
@@ -1,2 +1,4 @@
|
||||
export * from './adapter.js';
|
||||
export * from './delta-converter.js';
|
||||
export * from './hast.js';
|
||||
export * from './preprocessor.js';
|
||||
|
||||
88
blocksuite/affine/shared/src/adapters/types/preprocessor.ts
Normal file
88
blocksuite/affine/shared/src/adapters/types/preprocessor.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import type { ServiceIdentifier, ServiceProvider } from '@blocksuite/global/di';
|
||||
|
||||
/**
|
||||
* Level of preprocessing
|
||||
* - doc: Process at to doc snapshot level
|
||||
* - slice: Process at to slice snapshot level
|
||||
* - block: Process at to block snapshot level
|
||||
*/
|
||||
export type PreprocessLevel = 'doc' | 'slice' | 'block';
|
||||
|
||||
/**
|
||||
* Interface for adapter preprocessor
|
||||
* @template T Type of content to process, defaults to string
|
||||
*/
|
||||
export type AdapterPreprocessor<T = string> = {
|
||||
/**
|
||||
* Unique name of the preprocessor
|
||||
*/
|
||||
name: string;
|
||||
|
||||
/**
|
||||
* Levels this preprocessor supports
|
||||
*/
|
||||
levels: PreprocessLevel[];
|
||||
|
||||
/**
|
||||
* Process the content
|
||||
* @param content Content to process
|
||||
* @returns Processed content
|
||||
*/
|
||||
preprocess: (content: T) => T;
|
||||
};
|
||||
|
||||
/**
|
||||
* Manager class for handling preprocessors
|
||||
* @template T Type of content to process
|
||||
* @template P Type of preprocessor
|
||||
*/
|
||||
export abstract class PreprocessorManager<T, P extends AdapterPreprocessor<T>> {
|
||||
protected readonly preprocessors: Map<PreprocessLevel, Set<P>>;
|
||||
|
||||
constructor(
|
||||
protected readonly provider: ServiceProvider,
|
||||
protected readonly identifier: ServiceIdentifier<P>
|
||||
) {
|
||||
this.preprocessors = new Map();
|
||||
|
||||
// Initialize Sets for each level
|
||||
this.preprocessors.set('doc', new Set());
|
||||
this.preprocessors.set('slice', new Set());
|
||||
this.preprocessors.set('block', new Set());
|
||||
|
||||
// Register all preprocessors from provider
|
||||
this.initializePreprocessors();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize preprocessors from provider
|
||||
*/
|
||||
private initializePreprocessors(): void {
|
||||
const preprocessors = Array.from(
|
||||
this.provider.getAll(this.identifier).values()
|
||||
);
|
||||
|
||||
for (const preprocessor of preprocessors) {
|
||||
for (const level of preprocessor.levels) {
|
||||
const levelSet = this.preprocessors.get(level);
|
||||
if (levelSet) {
|
||||
levelSet.add(preprocessor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre process content at specified level
|
||||
* @param level Level to process at
|
||||
* @param content Content to process
|
||||
* @returns Processed content
|
||||
*/
|
||||
process(level: PreprocessLevel, content: T): T {
|
||||
const processors = this.preprocessors.get(level) ?? new Set();
|
||||
return Array.from(processors).reduce(
|
||||
(result, preprocessor) => preprocessor.preprocess(result),
|
||||
content
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user