fix(editor): wrap inline elements in a p tag to avoid treated as paragraph block when importing html (#12389)

This commit is contained in:
donteatfriedrice
2025-05-21 02:45:17 +00:00
parent 1a070367f3
commit bfbbc2342e
6 changed files with 235 additions and 78 deletions

View File

@@ -0,0 +1,55 @@
import rehypeParse from 'rehype-parse';
import rehypeStringify from 'rehype-stringify';
import { unified } from 'unified';
import { describe, expect, it } from 'vitest';
import { rehypeWrapInlineElements } from '../../../../adapters/html/rehype-plugins/wrap-inline-element';
describe('rehypeWrapInlineElements', () => {
const process = (html: string) => {
return unified()
.use(rehypeParse, { fragment: true })
.use(rehypeWrapInlineElements)
.use(rehypeStringify)
.processSync(html)
.toString();
};
it('should not wrap inline elements without block children in a div tag', () => {
const input = '<div><span>Hello World</span></div>';
const output = process(input);
expect(output).toBe('<div><span>Hello World</span></div>');
});
it('should not wrap elements without inline children in a div tag', () => {
const input = '<div><h1>Hello World</h1></div>';
const output = process(input);
expect(output).toBe('<div><h1>Hello World</h1></div>');
});
it('should wrap inline elements containing block children in a p tag', () => {
const input = '<div><p>Hello World</p><span>Hello World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello World</span></p></div>'
);
});
it('should wrap inline elements sequentially', () => {
const input =
'<div><p>Hello World</p><span>Hello</span><span>World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello</span><span>World</span></p></div>'
);
});
it('should wrap inline elements sequentially mixed with block elements', () => {
const input =
'<div><p>Hello World</p><span>Hello</span><span>World</span><h1>Title</h1><span>Hello</span><span>World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello</span><span>World</span></p><h1>Title</h1><p><span>Hello</span><span>World</span></p></div>'
);
});
});

View File

@@ -40,7 +40,10 @@ import {
HtmlDeltaConverter, HtmlDeltaConverter,
InlineDeltaToHtmlAdapterMatcherIdentifier, InlineDeltaToHtmlAdapterMatcherIdentifier,
} from './delta-converter'; } from './delta-converter';
import { rehypeInlineToBlock } from './rehype-plugins'; import {
rehypeInlineToBlock,
rehypeWrapInlineElements,
} from './rehype-plugins';
export type Html = string; export type Html = string;
@@ -196,7 +199,10 @@ export class HtmlAdapter extends BaseAdapter<Html> {
} }
private _htmlToAst(html: Html) { private _htmlToAst(html: Html) {
const processor = unified().use(rehypeParse).use(rehypeInlineToBlock); const processor = unified()
.use(rehypeParse)
.use(rehypeInlineToBlock)
.use(rehypeWrapInlineElements);
const ast = processor.parse(html); const ast = processor.parse(html);
return processor.runSync(ast); return processor.runSync(ast);
} }

View File

@@ -1 +1,2 @@
export * from './inline-to-block'; export * from './inline-to-block';
export * from './wrap-inline-element';

View File

@@ -2,28 +2,14 @@ import type { Root } from 'hast';
import type { Plugin } from 'unified'; import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit'; import { visit } from 'unist-util-visit';
import { HastUtils } from '../../utils/hast';
/** /**
* The content copied from google docs will be wrapped in <b> tag * The content copied from google docs will be wrapped in <b> tag
* To handle this case, we need to convert the <b> tag to a <div> tag * To handle this case, we need to convert the <b> tag to a <div> tag
*/ */
const inlineElements = new Set(['b']); const inlineElements = new Set(['b']);
const blockElements = new Set([
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
]);
export const rehypeInlineToBlock: Plugin<[], Root> = () => { export const rehypeInlineToBlock: Plugin<[], Root> = () => {
return tree => { return tree => {
visit(tree, 'element', node => { visit(tree, 'element', node => {
@@ -31,7 +17,8 @@ export const rehypeInlineToBlock: Plugin<[], Root> = () => {
if (inlineElements.has(node.tagName)) { if (inlineElements.has(node.tagName)) {
// Check if the node has a block element child // Check if the node has a block element child
const hasBlockChild = node.children.some( const hasBlockChild = node.children.some(
child => child.type === 'element' && blockElements.has(child.tagName) child =>
child.type === 'element' && HastUtils.isTagBlock(child.tagName)
); );
if (hasBlockChild) { if (hasBlockChild) {

View File

@@ -0,0 +1,79 @@
import type { Element, ElementContent, Root } from 'hast';
import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit';
import { HastUtils } from '../../utils/hast';
/**
* In some cases, the inline elements are wrapped in a div tag mixed with block elements
* We need to wrap them in a p tag to avoid the inline elements being treated as a block element
*/
export const rehypeWrapInlineElements: Plugin<[], Root> = () => {
return tree => {
visit(tree, 'element', (node: Element) => {
if (node.tagName === 'div') {
// First check if we have a mix of inline and block elements
let hasInline = false;
let hasBlock = false;
for (const child of node.children) {
if (child.type === 'element') {
if (HastUtils.isElementInline(child)) {
hasInline = true;
} else if (HastUtils.isTagBlock(child.tagName)) {
hasBlock = true;
}
if (hasInline && hasBlock) break;
}
}
// Only process if we have both inline and block elements
if (hasInline && hasBlock) {
const newChildren: ElementContent[] = [];
let currentInlineGroup: ElementContent[] = [];
for (const child of node.children) {
if (child.type === 'element') {
const elementChild = child;
if (HastUtils.isElementInline(elementChild)) {
// Add to current inline group
currentInlineGroup.push(elementChild);
} else if (HastUtils.isTagBlock(elementChild.tagName)) {
// If we have accumulated inline elements, wrap them in a p tag
if (currentInlineGroup.length > 0) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: currentInlineGroup,
});
currentInlineGroup = [];
}
// Add the block element as is
newChildren.push(elementChild);
} else {
// For unknown elements, treat them as inline
currentInlineGroup.push(elementChild);
}
} else {
// For text nodes, treat them as inline content
currentInlineGroup.push(child);
}
}
// Handle any remaining inline elements at the end
if (currentInlineGroup.length > 0) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: currentInlineGroup,
});
}
// Replace the original children with the new structure
node.children = newChildren;
}
}
});
};
};

View File

@@ -2,6 +2,86 @@ import type { Element, ElementContent, Text } from 'hast';
import type { HtmlAST } from '../types/hast.js'; import type { HtmlAST } from '../types/hast.js';
// Block elements that html adapter supports
const blockElements = [
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
];
const blockElementsSet = new Set(blockElements);
// Phrasing content
const inlineElements = [
'a',
'abbr',
'audio',
'b',
'bdi',
'bdo',
'br',
'button',
'canvas',
'cite',
'code',
'data',
'datalist',
'del',
'dfn',
'em',
'embed',
'i',
// 'iframe' is not included because it needs special handling
// 'img' is not included because it needs special handling
'input',
'ins',
'kbd',
'label',
'link',
'map',
'mark',
'math',
'meta',
'meter',
'noscript',
'object',
'output',
'picture',
'progress',
'q',
'ruby',
's',
'samp',
'script',
'select',
'slot',
'small',
'span',
'strong',
'sub',
'sup',
'svg',
'template',
'textarea',
'time',
'u',
'var',
'video',
'wbr',
];
const inlineElementsSet = new Set(inlineElements);
const isElement = (ast: HtmlAST): ast is Element => { const isElement = (ast: HtmlAST): ast is Element => {
return ast.type === 'element'; return ast.type === 'element';
}; };
@@ -53,66 +133,12 @@ const getTextChildrenOnlyAst = (ast: Element): Element => {
}; };
}; };
const isTagBlock = (tagName: string): boolean => {
return blockElementsSet.has(tagName);
};
const isTagInline = (tagName: string): boolean => { const isTagInline = (tagName: string): boolean => {
// Phrasing content return inlineElementsSet.has(tagName);
const inlineElements = [
'a',
'abbr',
'audio',
'b',
'bdi',
'bdo',
'br',
'button',
'canvas',
'cite',
'code',
'data',
'datalist',
'del',
'dfn',
'em',
'embed',
'i',
// 'iframe' is not included because it needs special handling
// 'img' is not included because it needs special handling
'input',
'ins',
'kbd',
'label',
'link',
'map',
'mark',
'math',
'meta',
'meter',
'noscript',
'object',
'output',
'picture',
'progress',
'q',
'ruby',
's',
'samp',
'script',
'select',
'slot',
'small',
'span',
'strong',
'sub',
'sup',
'svg',
'template',
'textarea',
'time',
'u',
'var',
'video',
'wbr',
];
return inlineElements.includes(tagName);
}; };
const isElementInline = (element: Element): boolean => { const isElementInline = (element: Element): boolean => {
@@ -263,4 +289,7 @@ export const HastUtils = {
querySelector, querySelector,
flatNodes, flatNodes,
isParagraphLike, isParagraphLike,
isTagBlock,
isTagInline,
isElementInline,
}; };