feat(editor): import docs from docx (#11774)

Support importing .docx files, as mentioned in
https://github.com/toeverything/AFFiNE/issues/10154#issuecomment-2655744757

It essentially uses mammoth to convert the docx to html, and then
imports the html with the standard steps.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Import Microsoft Word (.docx) files directly via the import dialog
(creates new documents).
* .docx added as a selectable file type in the file picker and import
options.

* **Localization**
* Added localized labels and tooltips for DOCX import in English,
Simplified Chinese, and Traditional Chinese.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: DarkSky <25152247+darkskygit@users.noreply.github.com>
Co-authored-by: DarkSky <darksky2048@gmail.com>
This commit is contained in:
Xun Sun
2025-11-15 15:51:23 +08:00
committed by GitHub
parent e5db0e66c1
commit 17ec76540b
14 changed files with 195 additions and 15 deletions
+1 -1
View File
@@ -46,7 +46,7 @@
"@preact/signals-core": "^1.8.0",
"@toeverything/theme": "^1.1.16",
"@types/lodash-es": "^4.17.12",
"dompurify": "^3.2.4",
"dompurify": "^3.3.0",
"html2canvas": "^1.4.1",
"lit": "^3.2.0",
"lodash-es": "^4.17.21",
+1 -1
View File
@@ -24,7 +24,7 @@
"@types/lodash-es": "^4.17.12",
"@types/mdast": "^4.0.4",
"bytes": "^3.1.2",
"dompurify": "^3.2.4",
"dompurify": "^3.3.0",
"fractional-indexing": "^3.2.0",
"lit": "^3.2.0",
"lodash-es": "^4.17.21",
@@ -20,6 +20,7 @@ import {
type ToDocSnapshotPayload,
type Transformer,
} from '@blocksuite/store';
import DOMPurify from 'dompurify';
import type { Root } from 'hast';
import rehypeParse from 'rehype-parse';
import rehypeStringify from 'rehype-stringify';
@@ -297,7 +298,8 @@ export class HtmlAdapter extends BaseAdapter<Html> {
override async toDocSnapshot(
payload: ToDocSnapshotPayload<string>
): Promise<DocSnapshot> {
const htmlAst = this._htmlToAst(payload.file);
const sanitized = DOMPurify.sanitize(payload.file);
const htmlAst = this._htmlToAst(sanitized);
const titleAst = HastUtils.querySelector(htmlAst, 'title');
const blockSnapshotRoot = {
type: 'block',
@@ -92,6 +92,13 @@ const FileTypes: NonNullable<OpenFilePickerOptions['types']> = [
'application/zip': ['.zip'],
},
},
{
description: 'Docx',
accept: {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
['.docx'],
},
},
{
description: 'MindMap',
accept: {
@@ -111,6 +118,7 @@ type AcceptTypes =
| 'Markdown'
| 'Html'
| 'Zip'
| 'Docx'
| 'MindMap';
export async function openFilesWith(
@@ -28,6 +28,7 @@
"fflate": "^0.8.2",
"lit": "^3.2.0",
"lodash-es": "^4.17.21",
"mammoth": "^1.11.0",
"rxjs": "^7.8.1",
"zod": "^3.23.8"
},
@@ -0,0 +1,47 @@
import type { ExtensionType, Schema, Workspace } from '@blocksuite/store';
// @ts-ignore
import { convertToHtml } from 'mammoth/mammoth.browser';
import { HtmlTransformer } from './html';
type ImportDocxOptions = {
collection: Workspace;
schema: Schema;
imported: Blob;
extensions: ExtensionType[];
};
/**
* Imports a .docx file into a doc.
*
* @param options - The import options.
* @param options.collection - The target doc collection.
* @param options.schema - The schema of the target doc collection.
* @param options.imported - The .docx file as a Blob.
* @returns A Promise that resolves to the ID of the newly created doc, or undefined if import fails.
*/
async function importDocx({
collection,
schema,
imported,
extensions,
}: ImportDocxOptions) {
try {
const { value } = await convertToHtml({
arrayBuffer: await imported.arrayBuffer(),
});
return await HtmlTransformer.importHTMLToDoc({
collection,
schema,
html: value,
extensions,
});
} catch (e) {
console.error('Failed to import .docx file:', e);
return undefined;
}
}
export const DocxTransformer = {
importDocx,
};
@@ -1,3 +1,4 @@
export { DocxTransformer } from './docx.js';
export { HtmlTransformer } from './html.js';
export { MarkdownTransformer } from './markdown.js';
export { NotionHtmlTransformer } from './notion-html.js';
+1 -1
View File
@@ -19,7 +19,7 @@
"@preact/signals-core": "^1.8.0",
"@types/hast": "^3.0.4",
"@types/lodash-es": "^4.17.12",
"dompurify": "^3.2.4",
"dompurify": "^3.3.0",
"fractional-indexing": "^3.2.0",
"lib0": "^0.2.97",
"lit": "^3.2.0",
@@ -22,6 +22,7 @@ import track from '@affine/track';
import { openFilesWith } from '@blocksuite/affine/shared/utils';
import type { Workspace } from '@blocksuite/affine/store';
import {
DocxTransformer,
HtmlTransformer,
MarkdownTransformer,
NotionHtmlTransformer,
@@ -30,6 +31,7 @@ import {
import {
ExportToHtmlIcon,
ExportToMarkdownIcon,
FileIcon,
HelpIcon,
NotionIcon,
PageIcon,
@@ -186,8 +188,9 @@ type ImportType =
| 'notion'
| 'snapshot'
| 'html'
| 'docx'
| 'dotaffinefile';
type AcceptType = 'Markdown' | 'Zip' | 'Html' | 'Skip'; // Skip is used for dotaffinefile
type AcceptType = 'Markdown' | 'Zip' | 'Html' | 'Docx' | 'Skip'; // Skip is used for dotaffinefile
type Status = 'idle' | 'importing' | 'success' | 'error';
type ImportResult = {
docIds: string[];
@@ -262,6 +265,17 @@ const importOptions = [
testId: 'editor-option-menu-import-notion',
type: 'notion' as ImportType,
},
{
key: 'docx',
label: 'com.affine.import.docx',
prefixIcon: <FileIcon color={cssVar('black')} width={20} height={20} />,
suffixIcon: (
<HelpIcon color={cssVarV2('icon/primary')} width={20} height={20} />
),
suffixTooltip: 'com.affine.import.docx.tooltip',
testId: 'editor-option-menu-import-docx',
type: 'docx' as ImportType,
},
{
key: 'snapshot',
label: 'com.affine.import.snapshot',
@@ -432,6 +446,23 @@ const importConfigs: Record<ImportType, ImportConfig> = {
};
},
},
docx: {
fileOptions: { acceptType: 'Docx', multiple: false },
importFunction: async (docCollection, file) => {
const files = Array.isArray(file) ? file : [file];
const docIds: string[] = [];
for (const file of files) {
const docId = await DocxTransformer.importDocx({
collection: docCollection,
schema: getAFFiNEWorkspaceSchema(),
imported: file,
extensions: getStoreManager().config.init().value.get('store'),
});
if (docId) docIds.push(docId);
}
return { docIds };
},
},
snapshot: {
fileOptions: { acceptType: 'Zip', multiple: false },
importFunction: async (
+8
View File
@@ -2411,6 +2411,14 @@ export function useAFFiNEI18N(): {
* `AFFiNE workspace data`
*/
["com.affine.import.affine-workspace-data"](): string;
/**
* `Docx`
*/
["com.affine.import.docx"](): string;
/**
* `Import your .docx file.`
*/
["com.affine.import.docx.tooltip"](): string;
/**
* `HTML`
*/
@@ -602,6 +602,8 @@
"com.affine.import-clipper.dialog.errorLoad": "Failed to load content, please try again.",
"com.affine.import_file": "Support Markdown/Notion",
"com.affine.import.affine-workspace-data": "AFFiNE workspace data",
"com.affine.import.docx": "Docx",
"com.affine.import.docx.tooltip": "Import your .docx file.",
"com.affine.import.html-files": "HTML",
"com.affine.import.html-files.tooltip": "This is an experimental feature that is not perfect and may cause your data to be missing after import.",
"com.affine.import.markdown-files": "Markdown files (.md)",
@@ -592,6 +592,8 @@
"com.affine.import-clipper.dialog.errorLoad": "读取内容失败,请重试。",
"com.affine.import_file": "支持 Markdown/Notion",
"com.affine.import.affine-workspace-data": "AFFiNE 工作区数据",
"com.affine.import.docx": "Docx",
"com.affine.import.docx.tooltip": "导入您的 Microsoft Office Word 文档。",
"com.affine.import.html-files": "HTML",
"com.affine.import.html-files.tooltip": "这是一个实验性功能,可能不完全完美,导入后可能会导致数据丢失。",
"com.affine.import.markdown-files": "Markdown 文件 (.md)",
@@ -592,6 +592,8 @@
"com.affine.import-clipper.dialog.errorLoad": "加載內容失敗,請重試。",
"com.affine.import_file": "支援 Markdown/Notion",
"com.affine.import.affine-workspace-data": "AFFiNE 工作區數據",
"com.affine.import.docx": "Docx",
"com.affine.import.docx.tooltip": "導入您的 Microsoft Office Word 文件。",
"com.affine.import.html-files": "HTML",
"com.affine.import.html-files.tooltip": "這是一個實驗性功能,可能不完全完美,導入後可能會導致數據丟失。",
"com.affine.import.markdown-files": "Markdown 文件 (.md)",
+86 -10
View File
@@ -2947,7 +2947,7 @@ __metadata:
"@preact/signals-core": "npm:^1.8.0"
"@toeverything/theme": "npm:^1.1.16"
"@types/lodash-es": "npm:^4.17.12"
dompurify: "npm:^3.2.4"
dompurify: "npm:^3.3.0"
html2canvas: "npm:^1.4.1"
lit: "npm:^3.2.0"
lodash-es: "npm:^4.17.21"
@@ -3805,7 +3805,7 @@ __metadata:
"@types/lodash-es": "npm:^4.17.12"
"@types/mdast": "npm:^4.0.4"
bytes: "npm:^3.1.2"
dompurify: "npm:^3.2.4"
dompurify: "npm:^3.3.0"
fractional-indexing: "npm:^3.2.0"
lit: "npm:^3.2.0"
lodash-es: "npm:^4.17.21"
@@ -4065,6 +4065,7 @@ __metadata:
fflate: "npm:^0.8.2"
lit: "npm:^3.2.0"
lodash-es: "npm:^4.17.21"
mammoth: "npm:^1.11.0"
rxjs: "npm:^7.8.1"
zod: "npm:^3.23.8"
languageName: unknown
@@ -4445,7 +4446,7 @@ __metadata:
"@preact/signals-core": "npm:^1.8.0"
"@types/hast": "npm:^3.0.4"
"@types/lodash-es": "npm:^4.17.12"
dompurify: "npm:^3.2.4"
dompurify: "npm:^3.3.0"
fractional-indexing: "npm:^3.2.0"
lib0: "npm:^0.2.97"
lit: "npm:^3.2.0"
@@ -17021,7 +17022,7 @@ __metadata:
languageName: node
linkType: hard
"@xmldom/xmldom@npm:^0.8.8":
"@xmldom/xmldom@npm:^0.8.6, @xmldom/xmldom@npm:^0.8.8":
version: 0.8.10
resolution: "@xmldom/xmldom@npm:0.8.10"
checksum: 10/62400bc5e0e75b90650e33a5ceeb8d94829dd11f9b260962b71a784cd014ddccec3e603fe788af9c1e839fa4648d8c521ebd80d8b752878d3a40edabc9ce7ccf
@@ -17523,7 +17524,7 @@ __metadata:
languageName: node
linkType: hard
"argparse@npm:^1.0.7":
"argparse@npm:^1.0.7, argparse@npm:~1.0.3":
version: 1.0.10
resolution: "argparse@npm:1.0.10"
dependencies:
@@ -17993,6 +17994,13 @@ __metadata:
languageName: node
linkType: hard
"bluebird@npm:~3.4.0":
version: 3.4.7
resolution: "bluebird@npm:3.4.7"
checksum: 10/340e4d11d4b6a26d90371180effb4e500197c2943e5426472d6b6bffca0032a534226ad10255fc0e39c025bea197341c6b2a4258f8c0f18217c7b3a254c76c14
languageName: node
linkType: hard
"blueimp-md5@npm:^2.10.0":
version: 2.19.0
resolution: "blueimp-md5@npm:2.19.0"
@@ -20941,6 +20949,13 @@ __metadata:
languageName: node
linkType: hard
"dingbat-to-unicode@npm:^1.0.1":
version: 1.0.1
resolution: "dingbat-to-unicode@npm:1.0.1"
checksum: 10/2b3d956d2fcbfc258ca33b0b32b1b16f53547576eddfb81fb2adf3887f86e803a39a080121f1f232dfe5e398f26d35532ab2d539278378bb4aff7aca755edf09
languageName: node
linkType: hard
"dir-compare@npm:^3.0.0":
version: 3.3.0
resolution: "dir-compare@npm:3.3.0"
@@ -21067,15 +21082,15 @@ __metadata:
languageName: node
linkType: hard
"dompurify@npm:^3.2.4, dompurify@npm:^3.2.5":
version: 3.2.7
resolution: "dompurify@npm:3.2.7"
"dompurify@npm:^3.2.5, dompurify@npm:^3.3.0":
version: 3.3.0
resolution: "dompurify@npm:3.3.0"
dependencies:
"@types/trusted-types": "npm:^2.0.7"
dependenciesMeta:
"@types/trusted-types":
optional: true
checksum: 10/51b7866fb834ee62d6c415f41ece5ce11db7b463f60a822932a1f832573a40b98be7715550298690e7647988fbe086db1098bda9b10548b3166fc975eb9bd849
checksum: 10/d8782b10a0454344476936c91038d06c9450b3e3ada2ceb8f722525e6b54e64d847939b9f35bf385facd4139f0a2eaf7f5553efce351f8e9295620570875f002
languageName: node
linkType: hard
@@ -21175,6 +21190,15 @@ __metadata:
languageName: node
linkType: hard
"duck@npm:^0.1.12":
version: 0.1.12
resolution: "duck@npm:0.1.12"
dependencies:
underscore: "npm:^1.13.1"
checksum: 10/2b0df97a4022f8106f89bc8724ffbf4f7cba8a39d113389a5d475743ee406a2f140bd427f9cc3a0755374070816c2ebdd4c4c11789fa34150a3e4b1e79097c00
languageName: node
linkType: hard
"eastasianwidth@npm:^0.2.0":
version: 0.2.0
resolution: "eastasianwidth@npm:0.2.0"
@@ -25708,7 +25732,7 @@ __metadata:
languageName: node
linkType: hard
"jszip@npm:^3.10.1":
"jszip@npm:^3.10.1, jszip@npm:^3.7.1":
version: 3.10.1
resolution: "jszip@npm:3.10.1"
dependencies:
@@ -26613,6 +26637,17 @@ __metadata:
languageName: node
linkType: hard
"lop@npm:^0.4.2":
version: 0.4.2
resolution: "lop@npm:0.4.2"
dependencies:
duck: "npm:^0.1.12"
option: "npm:~0.2.1"
underscore: "npm:^1.13.1"
checksum: 10/a8663c2a9341e2acdb06a42a2780b924657e45e27fb4af2542c22c27852bf4b08429bdccd5c285586b9672d81bc9ebe88468c4dcc06c22537742baf59175b0e8
languageName: node
linkType: hard
"lottie-react@npm:^2.4.0":
version: 2.4.1
resolution: "lottie-react@npm:2.4.1"
@@ -26844,6 +26879,26 @@ __metadata:
languageName: node
linkType: hard
"mammoth@npm:^1.11.0":
version: 1.11.0
resolution: "mammoth@npm:1.11.0"
dependencies:
"@xmldom/xmldom": "npm:^0.8.6"
argparse: "npm:~1.0.3"
base64-js: "npm:^1.5.1"
bluebird: "npm:~3.4.0"
dingbat-to-unicode: "npm:^1.0.1"
jszip: "npm:^3.7.1"
lop: "npm:^0.4.2"
path-is-absolute: "npm:^1.0.0"
underscore: "npm:^1.13.1"
xmlbuilder: "npm:^10.0.0"
bin:
mammoth: bin/mammoth
checksum: 10/87a84141f26be4dea617327417a66126676ec6316fe1ea7268bd4ea17005977e53f7ce63844a8faf8c2f0b67f0be89bf207619427fd573a4b85051464df42426
languageName: node
linkType: hard
"map-age-cleaner@npm:^0.1.1":
version: 0.1.3
resolution: "map-age-cleaner@npm:0.1.3"
@@ -28941,6 +28996,13 @@ __metadata:
languageName: node
linkType: hard
"option@npm:~0.2.1":
version: 0.2.4
resolution: "option@npm:0.2.4"
checksum: 10/4380db94f0657ca1b1540681c93a12ef59847bfa482f85929e5bb717bab6ba2e570ef500a71540b7c002735332846a03eca59af362d1aae6cb3cd66dc0d02435
languageName: node
linkType: hard
"optionator@npm:^0.9.3":
version: 0.9.4
resolution: "optionator@npm:0.9.4"
@@ -34440,6 +34502,13 @@ __metadata:
languageName: node
linkType: hard
"underscore@npm:^1.13.1":
version: 1.13.7
resolution: "underscore@npm:1.13.7"
checksum: 10/1ce3368dbe73d1e99678fa5d341a9682bd27316032ad2de7883901918f0f5d50e80320ccc543f53c1862ab057a818abc560462b5f83578afe2dd8dd7f779766c
languageName: node
linkType: hard
"undici-types@npm:~6.21.0":
version: 6.21.0
resolution: "undici-types@npm:6.21.0"
@@ -35895,6 +35964,13 @@ __metadata:
languageName: node
linkType: hard
"xmlbuilder@npm:^10.0.0":
version: 10.1.1
resolution: "xmlbuilder@npm:10.1.1"
checksum: 10/f6fdfe87d221a7388718fa339e0d12323c0f62d6f9422df1d6af31bb1166ea40fb0b15a9c9e9b495e0c62da6c75ae7bbd26d171041fa6064e2d1840e0445c55f
languageName: node
linkType: hard
"xmlbuilder@npm:^15.1.1":
version: 15.1.1
resolution: "xmlbuilder@npm:15.1.1"