Files
AFFiNE-Mirror/blocksuite/affine/inlines/preset/src/adapters/html/html-inline.ts
donteatfriedrice d56d46d8d6 fix(editor): should preserve format in <p> when importing html (#12275)
Closes: [BS-3485](https://linear.app/affine-design/issue/BS-3485/粘贴-html-格式的内容时,紧邻着-bold-text-的普通文本会丢失空格)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **Bug Fixes**
  - Improved handling of spaces and whitespace in paragraphs when converting HTML with inline formatting, ensuring spaces are preserved as in the original content.

- **Tests**
  - Added a new test to verify that spaces are correctly preserved in paragraphs containing bold and italic formatting during HTML conversion.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-05-15 00:50:10 +00:00

196 lines
5.3 KiB
TypeScript

import {
type HtmlAST,
HtmlASTToDeltaExtension,
} from '@blocksuite/affine-shared/adapters';
import { collapseWhiteSpace } from 'collapse-white-space';
import type { Element } from 'hast';
/**
* Handle empty text nodes created by HTML parser for styling purposes.
* These nodes typically contain only whitespace/newlines, for example:
* ```json
* {
* "type": "text",
* "value": "\n\n \n \n "
* }
* ```
* We collapse and trim the whitespace to check if the node is truly empty,
* and return an empty array in that case.
*/
const isEmptyText = (ast: HtmlAST): boolean => {
return (
ast.type === 'text' && collapseWhiteSpace(ast.value, { trim: true }) === ''
);
};
const isElement = (ast: HtmlAST): ast is Element => {
return ast.type === 'element';
};
const textLikeElementTags = new Set(['span', 'bdi', 'bdo', 'ins']);
const listElementTags = new Set(['ol', 'ul']);
const strongElementTags = new Set(['strong', 'b']);
const italicElementTags = new Set(['i', 'em']);
export const htmlTextToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'text',
match: ast => ast.type === 'text',
toDelta: (ast, context) => {
if (!('value' in ast)) {
return [];
}
const { options } = context;
options.trim ??= false;
if (options.pre) {
return [{ insert: ast.value }];
}
if (isEmptyText(ast)) {
return [];
}
const value = options.trim
? collapseWhiteSpace(ast.value, { trim: options.trim })
: collapseWhiteSpace(ast.value);
return value ? [{ insert: value }] : [];
},
});
export const htmlTextLikeElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'text-like-element',
match: ast => isElement(ast) && textLikeElementTags.has(ast.tagName),
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false })
);
},
});
export const htmlListToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'list-element',
match: ast => isElement(ast) && listElementTags.has(ast.tagName),
toDelta: () => {
return [];
},
});
export const htmlStrongElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'strong-element',
match: ast => isElement(ast) && strongElementTags.has(ast.tagName),
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false }).map(delta => {
delta.attributes = { ...delta.attributes, bold: true };
return delta;
})
);
},
});
export const htmlItalicElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'italic-element',
match: ast => isElement(ast) && italicElementTags.has(ast.tagName),
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false }).map(delta => {
delta.attributes = { ...delta.attributes, italic: true };
return delta;
})
);
},
});
export const htmlCodeElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'code-element',
match: ast => isElement(ast) && ast.tagName === 'code',
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false }).map(delta => {
delta.attributes = { ...delta.attributes, code: true };
return delta;
})
);
},
});
export const htmlDelElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'del-element',
match: ast => isElement(ast) && ast.tagName === 'del',
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false }).map(delta => {
delta.attributes = { ...delta.attributes, strike: true };
return delta;
})
);
},
});
export const htmlUnderlineElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'underline-element',
match: ast => isElement(ast) && ast.tagName === 'u',
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false }).map(delta => {
delta.attributes = { ...delta.attributes, underline: true };
return delta;
})
);
},
});
export const htmlMarkElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'mark-element',
match: ast => isElement(ast) && ast.tagName === 'mark',
toDelta: (ast, context) => {
if (!isElement(ast)) {
return [];
}
return ast.children.flatMap(child =>
context.toDelta(child, { trim: false }).map(delta => {
delta.attributes = { ...delta.attributes };
return delta;
})
);
},
});
export const htmlBrElementToDeltaMatcher = HtmlASTToDeltaExtension({
name: 'br-element',
match: ast => isElement(ast) && ast.tagName === 'br',
toDelta: () => {
return [{ insert: '\n' }];
},
});
export const HtmlInlineToDeltaAdapterExtensions = [
htmlTextToDeltaMatcher,
htmlTextLikeElementToDeltaMatcher,
htmlStrongElementToDeltaMatcher,
htmlItalicElementToDeltaMatcher,
htmlCodeElementToDeltaMatcher,
htmlDelElementToDeltaMatcher,
htmlUnderlineElementToDeltaMatcher,
htmlMarkElementToDeltaMatcher,
htmlBrElementToDeltaMatcher,
];