fix(editor): handle html content copied from google docs (#12383)

Closes: [BS-3508](https://linear.app/affine-design/issue/BS-3508/google-docs复制内容到affine时自动加粗问题)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Improved detection of bold, italic, underline, and strike-through formatting in imported HTML, supporting both tags and inline CSS styles.
  - Enhanced handling of inline elements containing block-level children to ensure correct formatting and structure during HTML import.
  - Introduced a plugin that converts inline elements with block-level children into block elements, preserving original tag information.
- **Bug Fixes**
  - Resolved issues where block-level elements nested inside inline tags could cause incorrect formatting or structure.
- **Tests**
  - Added comprehensive test coverage for HTML formatting conversions and plugin behavior to ensure accuracy and reliability.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
donteatfriedrice
2025-05-21 02:45:17 +00:00
parent ae0dbb9faf
commit 1a070367f3
6 changed files with 568 additions and 6 deletions

View File

@@ -0,0 +1,61 @@
import rehypeParse from 'rehype-parse';
import rehypeStringify from 'rehype-stringify';
import { unified } from 'unified';
import { describe, expect, it } from 'vitest';
import { rehypeInlineToBlock } from '../../../../adapters/html/rehype-plugins/inline-to-block';
describe('rehypeInlineToBlock', () => {
const process = (html: string) => {
return unified()
.use(rehypeParse, { fragment: true })
.use(rehypeInlineToBlock)
.use(rehypeStringify)
.processSync(html)
.toString();
};
it('should not transform inline elements without block children', () => {
const input = '<b>Hello World</b>';
const output = process(input);
expect(output).toBe('<b>Hello World</b>');
});
it('should transform inline elements containing block children', () => {
const input = '<b><p>Hello World</p></b>';
const output = process(input);
expect(output).toBe('<div data-original-tag="b"><p>Hello World</p></div>');
});
it('should preserve existing attributes when transforming', () => {
const input = '<b class="test" id="demo"><p>Hello World</p></b>';
const output = process(input);
expect(output).toBe(
'<div class="test" id="demo" data-original-tag="b"><p>Hello World</p></div>'
);
});
it('should handle multiple block elements within inline element', () => {
const input = '<b><p>First</p><div>Second</div><h1>Third</h1></b>';
const output = process(input);
expect(output).toBe(
'<div data-original-tag="b"><p>First</p><div>Second</div><h1>Third</h1></div>'
);
});
it('should handle mixed content (text and block elements)', () => {
const input = '<b>Text before<p>Block element</p>Text after</b>';
const output = process(input);
expect(output).toBe(
'<div data-original-tag="b">Text before<p>Block element</p>Text after</div>'
);
});
it('should handle complex nested structures', () => {
const input = '<b><div><p>Nested <b>inline</b> content</p></div></b>';
const output = process(input);
expect(output).toBe(
'<div data-original-tag="b"><div><p>Nested <b>inline</b> content</p></div></div>'
);
});
});

View File

@@ -40,6 +40,7 @@ import {
HtmlDeltaConverter,
InlineDeltaToHtmlAdapterMatcherIdentifier,
} from './delta-converter';
import { rehypeInlineToBlock } from './rehype-plugins';
export type Html = string;
@@ -195,7 +196,9 @@ export class HtmlAdapter extends BaseAdapter<Html> {
}
private _htmlToAst(html: Html) {
return unified().use(rehypeParse).parse(html);
const processor = unified().use(rehypeParse).use(rehypeInlineToBlock);
const ast = processor.parse(html);
return processor.runSync(ast);
}
override async fromBlockSnapshot(

View File

@@ -0,0 +1 @@
export * from './inline-to-block';

View File

@@ -0,0 +1,50 @@
import type { Root } from 'hast';
import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit';
/**
* The content copied from google docs will be wrapped in <b> tag
* To handle this case, we need to convert the <b> tag to a <div> tag
*/
const inlineElements = new Set(['b']);
const blockElements = new Set([
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
]);
export const rehypeInlineToBlock: Plugin<[], Root> = () => {
return tree => {
visit(tree, 'element', node => {
// Check if the current node is an inline element
if (inlineElements.has(node.tagName)) {
// Check if the node has a block element child
const hasBlockChild = node.children.some(
child => child.type === 'element' && blockElements.has(child.tagName)
);
if (hasBlockChild) {
const originalTag = node.tagName;
// Convert the inline element to a div
node.tagName = 'div';
// Keep the original properties
node.properties = {
...node.properties,
'data-original-tag': originalTag,
};
}
}
});
};
};