fix(editor): wrap inline elements in a p tag to avoid treated as paragraph block when importing html (#12389)

This commit is contained in:
donteatfriedrice
2025-05-21 02:45:17 +00:00
parent 1a070367f3
commit bfbbc2342e
6 changed files with 235 additions and 78 deletions

View File

@@ -0,0 +1,55 @@
import rehypeParse from 'rehype-parse';
import rehypeStringify from 'rehype-stringify';
import { unified } from 'unified';
import { describe, expect, it } from 'vitest';
import { rehypeWrapInlineElements } from '../../../../adapters/html/rehype-plugins/wrap-inline-element';
describe('rehypeWrapInlineElements', () => {
const process = (html: string) => {
return unified()
.use(rehypeParse, { fragment: true })
.use(rehypeWrapInlineElements)
.use(rehypeStringify)
.processSync(html)
.toString();
};
it('should not wrap inline elements without block children in a div tag', () => {
const input = '<div><span>Hello World</span></div>';
const output = process(input);
expect(output).toBe('<div><span>Hello World</span></div>');
});
it('should not wrap elements without inline children in a div tag', () => {
const input = '<div><h1>Hello World</h1></div>';
const output = process(input);
expect(output).toBe('<div><h1>Hello World</h1></div>');
});
it('should wrap inline elements containing block children in a p tag', () => {
const input = '<div><p>Hello World</p><span>Hello World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello World</span></p></div>'
);
});
it('should wrap inline elements sequentially', () => {
const input =
'<div><p>Hello World</p><span>Hello</span><span>World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello</span><span>World</span></p></div>'
);
});
it('should wrap inline elements sequentially mixed with block elements', () => {
const input =
'<div><p>Hello World</p><span>Hello</span><span>World</span><h1>Title</h1><span>Hello</span><span>World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello</span><span>World</span></p><h1>Title</h1><p><span>Hello</span><span>World</span></p></div>'
);
});
});

View File

@@ -40,7 +40,10 @@ import {
HtmlDeltaConverter,
InlineDeltaToHtmlAdapterMatcherIdentifier,
} from './delta-converter';
import { rehypeInlineToBlock } from './rehype-plugins';
import {
rehypeInlineToBlock,
rehypeWrapInlineElements,
} from './rehype-plugins';
export type Html = string;
@@ -196,7 +199,10 @@ export class HtmlAdapter extends BaseAdapter<Html> {
}
private _htmlToAst(html: Html) {
const processor = unified().use(rehypeParse).use(rehypeInlineToBlock);
const processor = unified()
.use(rehypeParse)
.use(rehypeInlineToBlock)
.use(rehypeWrapInlineElements);
const ast = processor.parse(html);
return processor.runSync(ast);
}

View File

@@ -1 +1,2 @@
export * from './inline-to-block';
export * from './wrap-inline-element';

View File

@@ -2,28 +2,14 @@ import type { Root } from 'hast';
import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit';
import { HastUtils } from '../../utils/hast';
/**
* The content copied from google docs will be wrapped in <b> tag
* To handle this case, we need to convert the <b> tag to a <div> tag
*/
const inlineElements = new Set(['b']);
const blockElements = new Set([
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
]);
export const rehypeInlineToBlock: Plugin<[], Root> = () => {
return tree => {
visit(tree, 'element', node => {
@@ -31,7 +17,8 @@ export const rehypeInlineToBlock: Plugin<[], Root> = () => {
if (inlineElements.has(node.tagName)) {
// Check if the node has a block element child
const hasBlockChild = node.children.some(
child => child.type === 'element' && blockElements.has(child.tagName)
child =>
child.type === 'element' && HastUtils.isTagBlock(child.tagName)
);
if (hasBlockChild) {

View File

@@ -0,0 +1,79 @@
import type { Element, ElementContent, Root } from 'hast';
import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit';
import { HastUtils } from '../../utils/hast';
/**
* In some cases, the inline elements are wrapped in a div tag mixed with block elements
* We need to wrap them in a p tag to avoid the inline elements being treated as a block element
*/
export const rehypeWrapInlineElements: Plugin<[], Root> = () => {
return tree => {
visit(tree, 'element', (node: Element) => {
if (node.tagName === 'div') {
// First check if we have a mix of inline and block elements
let hasInline = false;
let hasBlock = false;
for (const child of node.children) {
if (child.type === 'element') {
if (HastUtils.isElementInline(child)) {
hasInline = true;
} else if (HastUtils.isTagBlock(child.tagName)) {
hasBlock = true;
}
if (hasInline && hasBlock) break;
}
}
// Only process if we have both inline and block elements
if (hasInline && hasBlock) {
const newChildren: ElementContent[] = [];
let currentInlineGroup: ElementContent[] = [];
for (const child of node.children) {
if (child.type === 'element') {
const elementChild = child;
if (HastUtils.isElementInline(elementChild)) {
// Add to current inline group
currentInlineGroup.push(elementChild);
} else if (HastUtils.isTagBlock(elementChild.tagName)) {
// If we have accumulated inline elements, wrap them in a p tag
if (currentInlineGroup.length > 0) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: currentInlineGroup,
});
currentInlineGroup = [];
}
// Add the block element as is
newChildren.push(elementChild);
} else {
// For unknown elements, treat them as inline
currentInlineGroup.push(elementChild);
}
} else {
// For text nodes, treat them as inline content
currentInlineGroup.push(child);
}
}
// Handle any remaining inline elements at the end
if (currentInlineGroup.length > 0) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: currentInlineGroup,
});
}
// Replace the original children with the new structure
node.children = newChildren;
}
}
});
};
};

View File

@@ -2,6 +2,86 @@ import type { Element, ElementContent, Text } from 'hast';
import type { HtmlAST } from '../types/hast.js';
// Block elements that html adapter supports
const blockElements = [
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
];
const blockElementsSet = new Set(blockElements);
// Phrasing content
const inlineElements = [
'a',
'abbr',
'audio',
'b',
'bdi',
'bdo',
'br',
'button',
'canvas',
'cite',
'code',
'data',
'datalist',
'del',
'dfn',
'em',
'embed',
'i',
// 'iframe' is not included because it needs special handling
// 'img' is not included because it needs special handling
'input',
'ins',
'kbd',
'label',
'link',
'map',
'mark',
'math',
'meta',
'meter',
'noscript',
'object',
'output',
'picture',
'progress',
'q',
'ruby',
's',
'samp',
'script',
'select',
'slot',
'small',
'span',
'strong',
'sub',
'sup',
'svg',
'template',
'textarea',
'time',
'u',
'var',
'video',
'wbr',
];
const inlineElementsSet = new Set(inlineElements);
const isElement = (ast: HtmlAST): ast is Element => {
return ast.type === 'element';
};
@@ -53,66 +133,12 @@ const getTextChildrenOnlyAst = (ast: Element): Element => {
};
};
const isTagBlock = (tagName: string): boolean => {
return blockElementsSet.has(tagName);
};
const isTagInline = (tagName: string): boolean => {
// Phrasing content
const inlineElements = [
'a',
'abbr',
'audio',
'b',
'bdi',
'bdo',
'br',
'button',
'canvas',
'cite',
'code',
'data',
'datalist',
'del',
'dfn',
'em',
'embed',
'i',
// 'iframe' is not included because it needs special handling
// 'img' is not included because it needs special handling
'input',
'ins',
'kbd',
'label',
'link',
'map',
'mark',
'math',
'meta',
'meter',
'noscript',
'object',
'output',
'picture',
'progress',
'q',
'ruby',
's',
'samp',
'script',
'select',
'slot',
'small',
'span',
'strong',
'sub',
'sup',
'svg',
'template',
'textarea',
'time',
'u',
'var',
'video',
'wbr',
];
return inlineElements.includes(tagName);
return inlineElementsSet.has(tagName);
};
const isElementInline = (element: Element): boolean => {
@@ -263,4 +289,7 @@ export const HastUtils = {
querySelector,
flatNodes,
isParagraphLike,
isTagBlock,
isTagInline,
isElementInline,
};