fix(editor): wrap inline elements in a p tag to avoid treated as paragraph block when importing html (#12389)

This commit is contained in:
donteatfriedrice
2025-05-21 02:45:17 +00:00
parent 1a070367f3
commit bfbbc2342e
6 changed files with 235 additions and 78 deletions

View File

@@ -0,0 +1,55 @@
import rehypeParse from 'rehype-parse';
import rehypeStringify from 'rehype-stringify';
import { unified } from 'unified';
import { describe, expect, it } from 'vitest';
import { rehypeWrapInlineElements } from '../../../../adapters/html/rehype-plugins/wrap-inline-element';
describe('rehypeWrapInlineElements', () => {
const process = (html: string) => {
return unified()
.use(rehypeParse, { fragment: true })
.use(rehypeWrapInlineElements)
.use(rehypeStringify)
.processSync(html)
.toString();
};
it('should not wrap inline elements without block children in a div tag', () => {
const input = '<div><span>Hello World</span></div>';
const output = process(input);
expect(output).toBe('<div><span>Hello World</span></div>');
});
it('should not wrap elements without inline children in a div tag', () => {
const input = '<div><h1>Hello World</h1></div>';
const output = process(input);
expect(output).toBe('<div><h1>Hello World</h1></div>');
});
it('should wrap inline elements containing block children in a p tag', () => {
const input = '<div><p>Hello World</p><span>Hello World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello World</span></p></div>'
);
});
it('should wrap inline elements sequentially', () => {
const input =
'<div><p>Hello World</p><span>Hello</span><span>World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello</span><span>World</span></p></div>'
);
});
it('should wrap inline elements sequentially mixed with block elements', () => {
const input =
'<div><p>Hello World</p><span>Hello</span><span>World</span><h1>Title</h1><span>Hello</span><span>World</span></div>';
const output = process(input);
expect(output).toBe(
'<div><p>Hello World</p><p><span>Hello</span><span>World</span></p><h1>Title</h1><p><span>Hello</span><span>World</span></p></div>'
);
});
});

View File

@@ -40,7 +40,10 @@ import {
HtmlDeltaConverter, HtmlDeltaConverter,
InlineDeltaToHtmlAdapterMatcherIdentifier, InlineDeltaToHtmlAdapterMatcherIdentifier,
} from './delta-converter'; } from './delta-converter';
import { rehypeInlineToBlock } from './rehype-plugins'; import {
rehypeInlineToBlock,
rehypeWrapInlineElements,
} from './rehype-plugins';
export type Html = string; export type Html = string;
@@ -196,7 +199,10 @@ export class HtmlAdapter extends BaseAdapter<Html> {
} }
private _htmlToAst(html: Html) { private _htmlToAst(html: Html) {
const processor = unified().use(rehypeParse).use(rehypeInlineToBlock); const processor = unified()
.use(rehypeParse)
.use(rehypeInlineToBlock)
.use(rehypeWrapInlineElements);
const ast = processor.parse(html); const ast = processor.parse(html);
return processor.runSync(ast); return processor.runSync(ast);
} }

View File

@@ -1 +1,2 @@
export * from './inline-to-block'; export * from './inline-to-block';
export * from './wrap-inline-element';

View File

@@ -2,28 +2,14 @@ import type { Root } from 'hast';
import type { Plugin } from 'unified'; import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit'; import { visit } from 'unist-util-visit';
import { HastUtils } from '../../utils/hast';
/** /**
* The content copied from google docs will be wrapped in <b> tag * The content copied from google docs will be wrapped in <b> tag
* To handle this case, we need to convert the <b> tag to a <div> tag * To handle this case, we need to convert the <b> tag to a <div> tag
*/ */
const inlineElements = new Set(['b']); const inlineElements = new Set(['b']);
const blockElements = new Set([
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
]);
export const rehypeInlineToBlock: Plugin<[], Root> = () => { export const rehypeInlineToBlock: Plugin<[], Root> = () => {
return tree => { return tree => {
visit(tree, 'element', node => { visit(tree, 'element', node => {
@@ -31,7 +17,8 @@ export const rehypeInlineToBlock: Plugin<[], Root> = () => {
if (inlineElements.has(node.tagName)) { if (inlineElements.has(node.tagName)) {
// Check if the node has a block element child // Check if the node has a block element child
const hasBlockChild = node.children.some( const hasBlockChild = node.children.some(
child => child.type === 'element' && blockElements.has(child.tagName) child =>
child.type === 'element' && HastUtils.isTagBlock(child.tagName)
); );
if (hasBlockChild) { if (hasBlockChild) {

View File

@@ -0,0 +1,79 @@
import type { Element, ElementContent, Root } from 'hast';
import type { Plugin } from 'unified';
import { visit } from 'unist-util-visit';
import { HastUtils } from '../../utils/hast';
/**
* In some cases, the inline elements are wrapped in a div tag mixed with block elements
* We need to wrap them in a p tag to avoid the inline elements being treated as a block element
*/
export const rehypeWrapInlineElements: Plugin<[], Root> = () => {
return tree => {
visit(tree, 'element', (node: Element) => {
if (node.tagName === 'div') {
// First check if we have a mix of inline and block elements
let hasInline = false;
let hasBlock = false;
for (const child of node.children) {
if (child.type === 'element') {
if (HastUtils.isElementInline(child)) {
hasInline = true;
} else if (HastUtils.isTagBlock(child.tagName)) {
hasBlock = true;
}
if (hasInline && hasBlock) break;
}
}
// Only process if we have both inline and block elements
if (hasInline && hasBlock) {
const newChildren: ElementContent[] = [];
let currentInlineGroup: ElementContent[] = [];
for (const child of node.children) {
if (child.type === 'element') {
const elementChild = child;
if (HastUtils.isElementInline(elementChild)) {
// Add to current inline group
currentInlineGroup.push(elementChild);
} else if (HastUtils.isTagBlock(elementChild.tagName)) {
// If we have accumulated inline elements, wrap them in a p tag
if (currentInlineGroup.length > 0) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: currentInlineGroup,
});
currentInlineGroup = [];
}
// Add the block element as is
newChildren.push(elementChild);
} else {
// For unknown elements, treat them as inline
currentInlineGroup.push(elementChild);
}
} else {
// For text nodes, treat them as inline content
currentInlineGroup.push(child);
}
}
// Handle any remaining inline elements at the end
if (currentInlineGroup.length > 0) {
newChildren.push({
type: 'element',
tagName: 'p',
properties: {},
children: currentInlineGroup,
});
}
// Replace the original children with the new structure
node.children = newChildren;
}
}
});
};
};

View File

@@ -2,60 +2,27 @@ import type { Element, ElementContent, Text } from 'hast';
import type { HtmlAST } from '../types/hast.js'; import type { HtmlAST } from '../types/hast.js';
const isElement = (ast: HtmlAST): ast is Element => { // Block elements that html adapter supports
return ast.type === 'element'; const blockElements = [
}; 'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
];
const getTextContent = (ast: HtmlAST | undefined, defaultStr = ''): string => { const blockElementsSet = new Set(blockElements);
if (!ast) {
return defaultStr;
}
switch (ast.type) {
case 'text': {
return ast.value.replace(/\s+/g, ' ');
}
case 'element': {
switch (ast.tagName) {
case 'br': {
return '\n';
}
}
return ast.children.map(child => getTextContent(child)).join('');
}
}
return defaultStr;
};
const getElementChildren = (ast: HtmlAST | undefined): Element[] => { // Phrasing content
if (!ast) { const inlineElements = [
return [];
}
if (ast.type === 'element') {
return ast.children.filter(child => child.type === 'element') as Element[];
}
return [];
};
const getTextChildren = (ast: HtmlAST | undefined): Text[] => {
if (!ast) {
return [];
}
if (ast.type === 'element') {
return ast.children.filter(child => child.type === 'text') as Text[];
}
return [];
};
const getTextChildrenOnlyAst = (ast: Element): Element => {
return {
...ast,
children: getTextChildren(ast),
};
};
const isTagInline = (tagName: string): boolean => {
// Phrasing content
const inlineElements = [
'a', 'a',
'abbr', 'abbr',
'audio', 'audio',
@@ -111,8 +78,67 @@ const isTagInline = (tagName: string): boolean => {
'var', 'var',
'video', 'video',
'wbr', 'wbr',
]; ];
return inlineElements.includes(tagName);
const inlineElementsSet = new Set(inlineElements);
const isElement = (ast: HtmlAST): ast is Element => {
return ast.type === 'element';
};
const getTextContent = (ast: HtmlAST | undefined, defaultStr = ''): string => {
if (!ast) {
return defaultStr;
}
switch (ast.type) {
case 'text': {
return ast.value.replace(/\s+/g, ' ');
}
case 'element': {
switch (ast.tagName) {
case 'br': {
return '\n';
}
}
return ast.children.map(child => getTextContent(child)).join('');
}
}
return defaultStr;
};
const getElementChildren = (ast: HtmlAST | undefined): Element[] => {
if (!ast) {
return [];
}
if (ast.type === 'element') {
return ast.children.filter(child => child.type === 'element') as Element[];
}
return [];
};
const getTextChildren = (ast: HtmlAST | undefined): Text[] => {
if (!ast) {
return [];
}
if (ast.type === 'element') {
return ast.children.filter(child => child.type === 'text') as Text[];
}
return [];
};
const getTextChildrenOnlyAst = (ast: Element): Element => {
return {
...ast,
children: getTextChildren(ast),
};
};
const isTagBlock = (tagName: string): boolean => {
return blockElementsSet.has(tagName);
};
const isTagInline = (tagName: string): boolean => {
return inlineElementsSet.has(tagName);
}; };
const isElementInline = (element: Element): boolean => { const isElementInline = (element: Element): boolean => {
@@ -263,4 +289,7 @@ export const HastUtils = {
querySelector, querySelector,
flatNodes, flatNodes,
isParagraphLike, isParagraphLike,
isTagBlock,
isTagInline,
isElementInline,
}; };