From 17ec76540b8552412357e66707d7bd8027fc18c5 Mon Sep 17 00:00:00 2001 From: Xun Sun Date: Sat, 15 Nov 2025 15:51:23 +0800 Subject: [PATCH] feat(editor): import docs from docx (#11774) Support importing .docx files, as mentioned in https://github.com/toeverything/AFFiNE/issues/10154#issuecomment-2655744757 It essentially uses mammoth to convert the docx to html, and then imports the html with the standard steps. ## Summary by CodeRabbit * **New Features** * Import Microsoft Word (.docx) files directly via the import dialog (creates new documents). * .docx added as a selectable file type in the file picker and import options. * **Localization** * Added localized labels and tooltips for DOCX import in English, Simplified Chinese, and Traditional Chinese. --------- Co-authored-by: DarkSky <25152247+darkskygit@users.noreply.github.com> Co-authored-by: DarkSky --- blocksuite/affine/blocks/root/package.json | 2 +- blocksuite/affine/shared/package.json | 2 +- .../affine/shared/src/adapters/html/html.ts | 4 +- .../affine/shared/src/utils/file/filesys.ts | 8 ++ .../affine/widgets/linked-doc/package.json | 1 + .../linked-doc/src/transformers/docx.ts | 47 +++++++++ .../linked-doc/src/transformers/index.ts | 1 + blocksuite/framework/std/package.json | 2 +- .../core/src/desktop/dialogs/import/index.tsx | 33 ++++++- packages/frontend/i18n/src/i18n.gen.ts | 8 ++ packages/frontend/i18n/src/resources/en.json | 2 + .../frontend/i18n/src/resources/zh-Hans.json | 2 + .../frontend/i18n/src/resources/zh-Hant.json | 2 + yarn.lock | 96 +++++++++++++++++-- 14 files changed, 195 insertions(+), 15 deletions(-) create mode 100644 blocksuite/affine/widgets/linked-doc/src/transformers/docx.ts diff --git a/blocksuite/affine/blocks/root/package.json b/blocksuite/affine/blocks/root/package.json index 63560065ff..29bd5dca3a 100644 --- a/blocksuite/affine/blocks/root/package.json +++ b/blocksuite/affine/blocks/root/package.json @@ -46,7 +46,7 @@ "@preact/signals-core": "^1.8.0", "@toeverything/theme": "^1.1.16", "@types/lodash-es": "^4.17.12", - "dompurify": "^3.2.4", + "dompurify": "^3.3.0", "html2canvas": "^1.4.1", "lit": "^3.2.0", "lodash-es": "^4.17.21", diff --git a/blocksuite/affine/shared/package.json b/blocksuite/affine/shared/package.json index 4cacac78e6..7fc2471531 100644 --- a/blocksuite/affine/shared/package.json +++ b/blocksuite/affine/shared/package.json @@ -24,7 +24,7 @@ "@types/lodash-es": "^4.17.12", "@types/mdast": "^4.0.4", "bytes": "^3.1.2", - "dompurify": "^3.2.4", + "dompurify": "^3.3.0", "fractional-indexing": "^3.2.0", "lit": "^3.2.0", "lodash-es": "^4.17.21", diff --git a/blocksuite/affine/shared/src/adapters/html/html.ts b/blocksuite/affine/shared/src/adapters/html/html.ts index c3314b28f5..e7fde15023 100644 --- a/blocksuite/affine/shared/src/adapters/html/html.ts +++ b/blocksuite/affine/shared/src/adapters/html/html.ts @@ -20,6 +20,7 @@ import { type ToDocSnapshotPayload, type Transformer, } from '@blocksuite/store'; +import DOMPurify from 'dompurify'; import type { Root } from 'hast'; import rehypeParse from 'rehype-parse'; import rehypeStringify from 'rehype-stringify'; @@ -297,7 +298,8 @@ export class HtmlAdapter extends BaseAdapter { override async toDocSnapshot( payload: ToDocSnapshotPayload ): Promise { - const htmlAst = this._htmlToAst(payload.file); + const sanitized = DOMPurify.sanitize(payload.file); + const htmlAst = this._htmlToAst(sanitized); const titleAst = HastUtils.querySelector(htmlAst, 'title'); const blockSnapshotRoot = { type: 'block', diff --git a/blocksuite/affine/shared/src/utils/file/filesys.ts b/blocksuite/affine/shared/src/utils/file/filesys.ts index 60715aac0d..0258545be6 100644 --- a/blocksuite/affine/shared/src/utils/file/filesys.ts +++ b/blocksuite/affine/shared/src/utils/file/filesys.ts @@ -92,6 +92,13 @@ const FileTypes: NonNullable = [ 'application/zip': ['.zip'], }, }, + { + description: 'Docx', + accept: { + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': + ['.docx'], + }, + }, { description: 'MindMap', accept: { @@ -111,6 +118,7 @@ type AcceptTypes = | 'Markdown' | 'Html' | 'Zip' + | 'Docx' | 'MindMap'; export async function openFilesWith( diff --git a/blocksuite/affine/widgets/linked-doc/package.json b/blocksuite/affine/widgets/linked-doc/package.json index 92c10b6864..1cc0ba8ed5 100644 --- a/blocksuite/affine/widgets/linked-doc/package.json +++ b/blocksuite/affine/widgets/linked-doc/package.json @@ -28,6 +28,7 @@ "fflate": "^0.8.2", "lit": "^3.2.0", "lodash-es": "^4.17.21", + "mammoth": "^1.11.0", "rxjs": "^7.8.1", "zod": "^3.23.8" }, diff --git a/blocksuite/affine/widgets/linked-doc/src/transformers/docx.ts b/blocksuite/affine/widgets/linked-doc/src/transformers/docx.ts new file mode 100644 index 0000000000..7cc67c752d --- /dev/null +++ b/blocksuite/affine/widgets/linked-doc/src/transformers/docx.ts @@ -0,0 +1,47 @@ +import type { ExtensionType, Schema, Workspace } from '@blocksuite/store'; +// @ts-ignore +import { convertToHtml } from 'mammoth/mammoth.browser'; + +import { HtmlTransformer } from './html'; + +type ImportDocxOptions = { + collection: Workspace; + schema: Schema; + imported: Blob; + extensions: ExtensionType[]; +}; + +/** + * Imports a .docx file into a doc. + * + * @param options - The import options. + * @param options.collection - The target doc collection. + * @param options.schema - The schema of the target doc collection. + * @param options.imported - The .docx file as a Blob. + * @returns A Promise that resolves to the ID of the newly created doc, or undefined if import fails. + */ +async function importDocx({ + collection, + schema, + imported, + extensions, +}: ImportDocxOptions) { + try { + const { value } = await convertToHtml({ + arrayBuffer: await imported.arrayBuffer(), + }); + return await HtmlTransformer.importHTMLToDoc({ + collection, + schema, + html: value, + extensions, + }); + } catch (e) { + console.error('Failed to import .docx file:', e); + return undefined; + } +} + +export const DocxTransformer = { + importDocx, +}; diff --git a/blocksuite/affine/widgets/linked-doc/src/transformers/index.ts b/blocksuite/affine/widgets/linked-doc/src/transformers/index.ts index a25831dbbe..e0766f80f3 100644 --- a/blocksuite/affine/widgets/linked-doc/src/transformers/index.ts +++ b/blocksuite/affine/widgets/linked-doc/src/transformers/index.ts @@ -1,3 +1,4 @@ +export { DocxTransformer } from './docx.js'; export { HtmlTransformer } from './html.js'; export { MarkdownTransformer } from './markdown.js'; export { NotionHtmlTransformer } from './notion-html.js'; diff --git a/blocksuite/framework/std/package.json b/blocksuite/framework/std/package.json index e9e5ebf4f8..1d58e26d11 100644 --- a/blocksuite/framework/std/package.json +++ b/blocksuite/framework/std/package.json @@ -19,7 +19,7 @@ "@preact/signals-core": "^1.8.0", "@types/hast": "^3.0.4", "@types/lodash-es": "^4.17.12", - "dompurify": "^3.2.4", + "dompurify": "^3.3.0", "fractional-indexing": "^3.2.0", "lib0": "^0.2.97", "lit": "^3.2.0", diff --git a/packages/frontend/core/src/desktop/dialogs/import/index.tsx b/packages/frontend/core/src/desktop/dialogs/import/index.tsx index 207ef06658..30045d7ef1 100644 --- a/packages/frontend/core/src/desktop/dialogs/import/index.tsx +++ b/packages/frontend/core/src/desktop/dialogs/import/index.tsx @@ -22,6 +22,7 @@ import track from '@affine/track'; import { openFilesWith } from '@blocksuite/affine/shared/utils'; import type { Workspace } from '@blocksuite/affine/store'; import { + DocxTransformer, HtmlTransformer, MarkdownTransformer, NotionHtmlTransformer, @@ -30,6 +31,7 @@ import { import { ExportToHtmlIcon, ExportToMarkdownIcon, + FileIcon, HelpIcon, NotionIcon, PageIcon, @@ -186,8 +188,9 @@ type ImportType = | 'notion' | 'snapshot' | 'html' + | 'docx' | 'dotaffinefile'; -type AcceptType = 'Markdown' | 'Zip' | 'Html' | 'Skip'; // Skip is used for dotaffinefile +type AcceptType = 'Markdown' | 'Zip' | 'Html' | 'Docx' | 'Skip'; // Skip is used for dotaffinefile type Status = 'idle' | 'importing' | 'success' | 'error'; type ImportResult = { docIds: string[]; @@ -262,6 +265,17 @@ const importOptions = [ testId: 'editor-option-menu-import-notion', type: 'notion' as ImportType, }, + { + key: 'docx', + label: 'com.affine.import.docx', + prefixIcon: , + suffixIcon: ( + + ), + suffixTooltip: 'com.affine.import.docx.tooltip', + testId: 'editor-option-menu-import-docx', + type: 'docx' as ImportType, + }, { key: 'snapshot', label: 'com.affine.import.snapshot', @@ -432,6 +446,23 @@ const importConfigs: Record = { }; }, }, + docx: { + fileOptions: { acceptType: 'Docx', multiple: false }, + importFunction: async (docCollection, file) => { + const files = Array.isArray(file) ? file : [file]; + const docIds: string[] = []; + for (const file of files) { + const docId = await DocxTransformer.importDocx({ + collection: docCollection, + schema: getAFFiNEWorkspaceSchema(), + imported: file, + extensions: getStoreManager().config.init().value.get('store'), + }); + if (docId) docIds.push(docId); + } + return { docIds }; + }, + }, snapshot: { fileOptions: { acceptType: 'Zip', multiple: false }, importFunction: async ( diff --git a/packages/frontend/i18n/src/i18n.gen.ts b/packages/frontend/i18n/src/i18n.gen.ts index 7b50d65376..7d24bf615a 100644 --- a/packages/frontend/i18n/src/i18n.gen.ts +++ b/packages/frontend/i18n/src/i18n.gen.ts @@ -2411,6 +2411,14 @@ export function useAFFiNEI18N(): { * `AFFiNE workspace data` */ ["com.affine.import.affine-workspace-data"](): string; + /** + * `Docx` + */ + ["com.affine.import.docx"](): string; + /** + * `Import your .docx file.` + */ + ["com.affine.import.docx.tooltip"](): string; /** * `HTML` */ diff --git a/packages/frontend/i18n/src/resources/en.json b/packages/frontend/i18n/src/resources/en.json index 43a100e64c..8c7dec9ee6 100644 --- a/packages/frontend/i18n/src/resources/en.json +++ b/packages/frontend/i18n/src/resources/en.json @@ -602,6 +602,8 @@ "com.affine.import-clipper.dialog.errorLoad": "Failed to load content, please try again.", "com.affine.import_file": "Support Markdown/Notion", "com.affine.import.affine-workspace-data": "AFFiNE workspace data", + "com.affine.import.docx": "Docx", + "com.affine.import.docx.tooltip": "Import your .docx file.", "com.affine.import.html-files": "HTML", "com.affine.import.html-files.tooltip": "This is an experimental feature that is not perfect and may cause your data to be missing after import.", "com.affine.import.markdown-files": "Markdown files (.md)", diff --git a/packages/frontend/i18n/src/resources/zh-Hans.json b/packages/frontend/i18n/src/resources/zh-Hans.json index 607321f2b6..209e48d21d 100644 --- a/packages/frontend/i18n/src/resources/zh-Hans.json +++ b/packages/frontend/i18n/src/resources/zh-Hans.json @@ -592,6 +592,8 @@ "com.affine.import-clipper.dialog.errorLoad": "读取内容失败,请重试。", "com.affine.import_file": "支持 Markdown/Notion", "com.affine.import.affine-workspace-data": "AFFiNE 工作区数据", + "com.affine.import.docx": "Docx", + "com.affine.import.docx.tooltip": "导入您的 Microsoft Office Word 文档。", "com.affine.import.html-files": "HTML", "com.affine.import.html-files.tooltip": "这是一个实验性功能,可能不完全完美,导入后可能会导致数据丢失。", "com.affine.import.markdown-files": "Markdown 文件 (.md)", diff --git a/packages/frontend/i18n/src/resources/zh-Hant.json b/packages/frontend/i18n/src/resources/zh-Hant.json index 319d556d5a..55898e81cc 100644 --- a/packages/frontend/i18n/src/resources/zh-Hant.json +++ b/packages/frontend/i18n/src/resources/zh-Hant.json @@ -592,6 +592,8 @@ "com.affine.import-clipper.dialog.errorLoad": "加載內容失敗,請重試。", "com.affine.import_file": "支援 Markdown/Notion", "com.affine.import.affine-workspace-data": "AFFiNE 工作區數據", + "com.affine.import.docx": "Docx", + "com.affine.import.docx.tooltip": "導入您的 Microsoft Office Word 文件。", "com.affine.import.html-files": "HTML", "com.affine.import.html-files.tooltip": "這是一個實驗性功能,可能不完全完美,導入後可能會導致數據丟失。", "com.affine.import.markdown-files": "Markdown 文件 (.md)", diff --git a/yarn.lock b/yarn.lock index 453fa87626..d9e547e79e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2947,7 +2947,7 @@ __metadata: "@preact/signals-core": "npm:^1.8.0" "@toeverything/theme": "npm:^1.1.16" "@types/lodash-es": "npm:^4.17.12" - dompurify: "npm:^3.2.4" + dompurify: "npm:^3.3.0" html2canvas: "npm:^1.4.1" lit: "npm:^3.2.0" lodash-es: "npm:^4.17.21" @@ -3805,7 +3805,7 @@ __metadata: "@types/lodash-es": "npm:^4.17.12" "@types/mdast": "npm:^4.0.4" bytes: "npm:^3.1.2" - dompurify: "npm:^3.2.4" + dompurify: "npm:^3.3.0" fractional-indexing: "npm:^3.2.0" lit: "npm:^3.2.0" lodash-es: "npm:^4.17.21" @@ -4065,6 +4065,7 @@ __metadata: fflate: "npm:^0.8.2" lit: "npm:^3.2.0" lodash-es: "npm:^4.17.21" + mammoth: "npm:^1.11.0" rxjs: "npm:^7.8.1" zod: "npm:^3.23.8" languageName: unknown @@ -4445,7 +4446,7 @@ __metadata: "@preact/signals-core": "npm:^1.8.0" "@types/hast": "npm:^3.0.4" "@types/lodash-es": "npm:^4.17.12" - dompurify: "npm:^3.2.4" + dompurify: "npm:^3.3.0" fractional-indexing: "npm:^3.2.0" lib0: "npm:^0.2.97" lit: "npm:^3.2.0" @@ -17021,7 +17022,7 @@ __metadata: languageName: node linkType: hard -"@xmldom/xmldom@npm:^0.8.8": +"@xmldom/xmldom@npm:^0.8.6, @xmldom/xmldom@npm:^0.8.8": version: 0.8.10 resolution: "@xmldom/xmldom@npm:0.8.10" checksum: 10/62400bc5e0e75b90650e33a5ceeb8d94829dd11f9b260962b71a784cd014ddccec3e603fe788af9c1e839fa4648d8c521ebd80d8b752878d3a40edabc9ce7ccf @@ -17523,7 +17524,7 @@ __metadata: languageName: node linkType: hard -"argparse@npm:^1.0.7": +"argparse@npm:^1.0.7, argparse@npm:~1.0.3": version: 1.0.10 resolution: "argparse@npm:1.0.10" dependencies: @@ -17993,6 +17994,13 @@ __metadata: languageName: node linkType: hard +"bluebird@npm:~3.4.0": + version: 3.4.7 + resolution: "bluebird@npm:3.4.7" + checksum: 10/340e4d11d4b6a26d90371180effb4e500197c2943e5426472d6b6bffca0032a534226ad10255fc0e39c025bea197341c6b2a4258f8c0f18217c7b3a254c76c14 + languageName: node + linkType: hard + "blueimp-md5@npm:^2.10.0": version: 2.19.0 resolution: "blueimp-md5@npm:2.19.0" @@ -20941,6 +20949,13 @@ __metadata: languageName: node linkType: hard +"dingbat-to-unicode@npm:^1.0.1": + version: 1.0.1 + resolution: "dingbat-to-unicode@npm:1.0.1" + checksum: 10/2b3d956d2fcbfc258ca33b0b32b1b16f53547576eddfb81fb2adf3887f86e803a39a080121f1f232dfe5e398f26d35532ab2d539278378bb4aff7aca755edf09 + languageName: node + linkType: hard + "dir-compare@npm:^3.0.0": version: 3.3.0 resolution: "dir-compare@npm:3.3.0" @@ -21067,15 +21082,15 @@ __metadata: languageName: node linkType: hard -"dompurify@npm:^3.2.4, dompurify@npm:^3.2.5": - version: 3.2.7 - resolution: "dompurify@npm:3.2.7" +"dompurify@npm:^3.2.5, dompurify@npm:^3.3.0": + version: 3.3.0 + resolution: "dompurify@npm:3.3.0" dependencies: "@types/trusted-types": "npm:^2.0.7" dependenciesMeta: "@types/trusted-types": optional: true - checksum: 10/51b7866fb834ee62d6c415f41ece5ce11db7b463f60a822932a1f832573a40b98be7715550298690e7647988fbe086db1098bda9b10548b3166fc975eb9bd849 + checksum: 10/d8782b10a0454344476936c91038d06c9450b3e3ada2ceb8f722525e6b54e64d847939b9f35bf385facd4139f0a2eaf7f5553efce351f8e9295620570875f002 languageName: node linkType: hard @@ -21175,6 +21190,15 @@ __metadata: languageName: node linkType: hard +"duck@npm:^0.1.12": + version: 0.1.12 + resolution: "duck@npm:0.1.12" + dependencies: + underscore: "npm:^1.13.1" + checksum: 10/2b0df97a4022f8106f89bc8724ffbf4f7cba8a39d113389a5d475743ee406a2f140bd427f9cc3a0755374070816c2ebdd4c4c11789fa34150a3e4b1e79097c00 + languageName: node + linkType: hard + "eastasianwidth@npm:^0.2.0": version: 0.2.0 resolution: "eastasianwidth@npm:0.2.0" @@ -25708,7 +25732,7 @@ __metadata: languageName: node linkType: hard -"jszip@npm:^3.10.1": +"jszip@npm:^3.10.1, jszip@npm:^3.7.1": version: 3.10.1 resolution: "jszip@npm:3.10.1" dependencies: @@ -26613,6 +26637,17 @@ __metadata: languageName: node linkType: hard +"lop@npm:^0.4.2": + version: 0.4.2 + resolution: "lop@npm:0.4.2" + dependencies: + duck: "npm:^0.1.12" + option: "npm:~0.2.1" + underscore: "npm:^1.13.1" + checksum: 10/a8663c2a9341e2acdb06a42a2780b924657e45e27fb4af2542c22c27852bf4b08429bdccd5c285586b9672d81bc9ebe88468c4dcc06c22537742baf59175b0e8 + languageName: node + linkType: hard + "lottie-react@npm:^2.4.0": version: 2.4.1 resolution: "lottie-react@npm:2.4.1" @@ -26844,6 +26879,26 @@ __metadata: languageName: node linkType: hard +"mammoth@npm:^1.11.0": + version: 1.11.0 + resolution: "mammoth@npm:1.11.0" + dependencies: + "@xmldom/xmldom": "npm:^0.8.6" + argparse: "npm:~1.0.3" + base64-js: "npm:^1.5.1" + bluebird: "npm:~3.4.0" + dingbat-to-unicode: "npm:^1.0.1" + jszip: "npm:^3.7.1" + lop: "npm:^0.4.2" + path-is-absolute: "npm:^1.0.0" + underscore: "npm:^1.13.1" + xmlbuilder: "npm:^10.0.0" + bin: + mammoth: bin/mammoth + checksum: 10/87a84141f26be4dea617327417a66126676ec6316fe1ea7268bd4ea17005977e53f7ce63844a8faf8c2f0b67f0be89bf207619427fd573a4b85051464df42426 + languageName: node + linkType: hard + "map-age-cleaner@npm:^0.1.1": version: 0.1.3 resolution: "map-age-cleaner@npm:0.1.3" @@ -28941,6 +28996,13 @@ __metadata: languageName: node linkType: hard +"option@npm:~0.2.1": + version: 0.2.4 + resolution: "option@npm:0.2.4" + checksum: 10/4380db94f0657ca1b1540681c93a12ef59847bfa482f85929e5bb717bab6ba2e570ef500a71540b7c002735332846a03eca59af362d1aae6cb3cd66dc0d02435 + languageName: node + linkType: hard + "optionator@npm:^0.9.3": version: 0.9.4 resolution: "optionator@npm:0.9.4" @@ -34440,6 +34502,13 @@ __metadata: languageName: node linkType: hard +"underscore@npm:^1.13.1": + version: 1.13.7 + resolution: "underscore@npm:1.13.7" + checksum: 10/1ce3368dbe73d1e99678fa5d341a9682bd27316032ad2de7883901918f0f5d50e80320ccc543f53c1862ab057a818abc560462b5f83578afe2dd8dd7f779766c + languageName: node + linkType: hard + "undici-types@npm:~6.21.0": version: 6.21.0 resolution: "undici-types@npm:6.21.0" @@ -35895,6 +35964,13 @@ __metadata: languageName: node linkType: hard +"xmlbuilder@npm:^10.0.0": + version: 10.1.1 + resolution: "xmlbuilder@npm:10.1.1" + checksum: 10/f6fdfe87d221a7388718fa339e0d12323c0f62d6f9422df1d6af31bb1166ea40fb0b15a9c9e9b495e0c62da6c75ae7bbd26d171041fa6064e2d1840e0445c55f + languageName: node + linkType: hard + "xmlbuilder@npm:^15.1.1": version: 15.1.1 resolution: "xmlbuilder@npm:15.1.1"