From a0613b6306e492227c58fb1fca217cf64bf7cf82 Mon Sep 17 00:00:00 2001 From: Martijn Smit Date: Tue, 21 Oct 2025 19:34:23 +0200 Subject: [PATCH] feat(core): enhance Notion import with folder hierarchy and page icons (#13692) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hi 👋 Love what you're doing with AFFiNE, I found my Notion replacement in this app. Coming from Notion and importing my workspaces, I saw the need to improve the import functionality to replicate the folder hierarchy from Notion into the AFFiNE folder structure in the `Organize` section. There are a few issues(i.e. #12961 and #13629 ) of people that expected the same behavior. This should make it a lot easier for people to move over. ### Technical description Improve Notion import functionality to preserve organizational structure and page icons from exports: - Add folder hierarchy preservation during Notion import - Extract and set page icons (emoji and image) from Notion HTML - Integrate with OrganizeService to recreate folder structure - Use ExplorerIconService to apply page icons to imported documents - Tested using production workspace exports from Notion, several thousand pages and multiple hierarchy levels deep. ## Summary by CodeRabbit - New Features - Imports from Notion/HTML now recreate folder hierarchies and link imported pages into that structure. - Page icons (emoji and supported images) are extracted and attached to imported pages. - Import results now include the created root folder for quick access. - Improvements - Import flows integrate folder creation and icon assignment with non-blocking error handling. - Bug Fixes - Avoids including undefined page IDs in successful imports. --------- Co-authored-by: DarkSky Co-authored-by: DarkSky <25152247+darkskygit@users.noreply.github.com> --- .../linked-doc/src/import-doc/import-doc.ts | 2 +- .../src/transformers/notion-html.ts | 236 ++++++++++++++++- .../core/src/desktop/dialogs/import/index.tsx | 239 +++++++++++++++++- 3 files changed, 462 insertions(+), 15 deletions(-) diff --git a/blocksuite/affine/widgets/linked-doc/src/import-doc/import-doc.ts b/blocksuite/affine/widgets/linked-doc/src/import-doc/import-doc.ts index a43aa15477..0de99711aa 100644 --- a/blocksuite/affine/widgets/linked-doc/src/import-doc/import-doc.ts +++ b/blocksuite/affine/widgets/linked-doc/src/import-doc/import-doc.ts @@ -134,7 +134,7 @@ export class ImportDoc extends WithDisposable(LitElement) { ); return; } - this._onImportSuccess([entryId], { + this._onImportSuccess(entryId ? [entryId] : [], { isWorkspaceFile, importedCount: pageIds.length, }); diff --git a/blocksuite/affine/widgets/linked-doc/src/transformers/notion-html.ts b/blocksuite/affine/widgets/linked-doc/src/transformers/notion-html.ts index b5b36b20f9..1febb99812 100644 --- a/blocksuite/affine/widgets/linked-doc/src/transformers/notion-html.ts +++ b/blocksuite/affine/widgets/linked-doc/src/transformers/notion-html.ts @@ -21,6 +21,28 @@ type ImportNotionZipOptions = { extensions: ExtensionType[]; }; +type PageIcon = { + type: 'emoji' | 'image'; + content: string; // emoji unicode or image URL/data +}; + +type FolderHierarchy = { + name: string; + path: string; + children: Map; + pageId?: string; + parentPath?: string; + icon?: PageIcon; +}; + +type ImportNotionZipResult = { + entryId: string | undefined; + pageIds: string[]; + isWorkspaceFile: boolean; + hasMarkdown: boolean; + folderHierarchy?: FolderHierarchy; +}; + function getProvider(extensions: ExtensionType[]) { const container = new Container(); extensions.forEach(ext => { @@ -29,6 +51,197 @@ function getProvider(extensions: ExtensionType[]) { return container.provider(); } +function parseFolderPath(filePath: string): { + folderParts: string[]; + fileName: string; +} { + const parts = filePath.split('/'); + const fileName = parts.pop() || ''; + return { folderParts: parts.filter(part => part.length > 0), fileName }; +} + +function extractPageIcon(doc: Document): PageIcon | undefined { + // Look for Notion page icon in the HTML + // Notion export format:
✅
+ + console.log('=== Extracting page icon ==='); + + // Check if there's a head section with title for debugging + const headTitle = doc.querySelector('head title'); + if (headTitle) { + console.log('Page title from head:', headTitle.textContent); + } + + // Look for the exact Notion export structure: .page-header-icon .icon + const notionIconSpan = doc.querySelector('.page-header-icon .icon'); + if (notionIconSpan && notionIconSpan.textContent) { + const iconContent = notionIconSpan.textContent.trim(); + console.log('Found Notion icon (.page-header-icon .icon):', iconContent); + if (/\p{Emoji}/u.test(iconContent)) { + return { + type: 'emoji', + content: iconContent, + }; + } + } + + // Look for page header area for debugging + const pageHeader = doc.querySelector('.page-header-icon'); + if (pageHeader) { + console.log( + 'Found .page-header-icon:', + pageHeader.outerHTML.substring(0, 300) + '...' + ); + } + + // Fallback: try to find emoji icons with older selectors + const emojiIcon = doc.querySelector('.page-header-icon .notion-emoji'); + if (emojiIcon && emojiIcon.textContent) { + console.log( + 'Found emoji icon (.page-header-icon .notion-emoji):', + emojiIcon.textContent + ); + return { + type: 'emoji', + content: emojiIcon.textContent.trim(), + }; + } + + // Try alternative emoji selectors + const altEmojiIcon = doc.querySelector('[role="img"][aria-label]'); + if ( + altEmojiIcon && + altEmojiIcon.textContent && + /\p{Emoji}/u.test(altEmojiIcon.textContent) + ) { + console.log( + 'Found emoji icon ([role="img"][aria-label]):', + altEmojiIcon.textContent + ); + return { + type: 'emoji', + content: altEmojiIcon.textContent.trim(), + }; + } + + // Look for image icons in the page header + const imageIcon = doc.querySelector('.page-header-icon img'); + if (imageIcon) { + const src = imageIcon.getAttribute('src'); + console.log('Found image icon (.page-header-icon img):', src); + if (src) { + return { + type: 'image', + content: src, + }; + } + } + + // Fallback: Look for any span with emoji class "icon" in page header area + const iconSpans = doc.querySelectorAll('span.icon'); + for (const span of iconSpans) { + if (span.textContent && /\p{Emoji}/u.test(span.textContent.trim())) { + const parent = span.parentElement; + console.log( + 'Found emoji in span.icon:', + span.textContent, + 'parent classes:', + parent?.className + ); + // Check if this is in a page header context + if ( + parent && + (parent.classList.contains('page-header-icon') || + parent.closest('.page-header-icon')) + ) { + console.log( + 'Using emoji from span.icon in page header:', + span.textContent + ); + return { + type: 'emoji', + content: span.textContent.trim(), + }; + } + } + } + + // Fallback: Try to find icons in the page title area that might contain emoji + const pageTitle = doc.querySelector('.page-title, h1'); + if (pageTitle && pageTitle.textContent) { + console.log('Page title element found:', pageTitle.textContent); + const text = pageTitle.textContent.trim(); + // Check if the title starts with an emoji + const emojiMatch = text.match(/^(\p{Emoji}+)/u); + if (emojiMatch) { + console.log('Found emoji in title:', emojiMatch[1]); + return { + type: 'emoji', + content: emojiMatch[1], + }; + } + } + + console.log('No page icon found'); + return undefined; +} + +function buildFolderHierarchy( + pagePaths: Array<{ path: string; pageId: string; icon?: PageIcon }> +): FolderHierarchy { + const root: FolderHierarchy = { + name: '', + path: '', + children: new Map(), + }; + + for (const { path, pageId, icon } of pagePaths) { + const { folderParts, fileName } = parseFolderPath(path); + let current = root; + let currentPath = ''; + + // Navigate/create folder structure + for (const folderName of folderParts) { + const parentPath = currentPath; + currentPath = currentPath ? `${currentPath}/${folderName}` : folderName; + + if (!current.children.has(folderName)) { + current.children.set(folderName, { + name: folderName, + path: currentPath, + parentPath: parentPath || undefined, + children: new Map(), + }); + } + current = current.children.get(folderName)!; + } + + // If this is a page file, associate it with the current folder + if (fileName.endsWith('.html') && !fileName.startsWith('index.html')) { + const pageName = fileName.replace(/\.html$/, ''); + if (!current.children.has(pageName)) { + current.children.set(pageName, { + name: pageName, + path: path, + parentPath: current.path || undefined, + children: new Map(), + pageId: pageId, + icon: icon, + }); + } else { + // Update existing entry with pageId and icon + const existingPage = current.children.get(pageName)!; + existingPage.pageId = pageId; + if (icon) { + existingPage.icon = icon; + } + } + } + } + + return root; +} + /** * Imports a Notion zip file into the BlockSuite collection. * @@ -42,18 +255,24 @@ function getProvider(extensions: ExtensionType[]) { * - pageIds: An array of imported page IDs. * - isWorkspaceFile: Whether the imported file is a workspace file. * - hasMarkdown: Whether the zip contains markdown files. + * - folderHierarchy: The parsed folder hierarchy from the Notion export. */ async function importNotionZip({ collection, schema, imported, extensions, -}: ImportNotionZipOptions) { +}: ImportNotionZipOptions): Promise { const provider = getProvider(extensions); const pageIds: string[] = []; let isWorkspaceFile = false; let hasMarkdown = false; let entryId: string | undefined; + const pagePathsWithIds: Array<{ + path: string; + pageId: string; + icon?: PageIcon; + }> = []; const parseZipFile = async (path: File | Blob) => { const unzip = new Unzip(); await unzip.load(path); @@ -80,6 +299,8 @@ async function importNotionZip({ isWorkspaceFile = true; continue; } + + let pageIcon: PageIcon | undefined; if (lastSplitIndex !== -1) { const text = await content.text(); const doc = new DOMParser().parseFromString(text, 'text/html'); @@ -88,7 +309,10 @@ async function importNotionZip({ // Skip empty pages continue; } + // Extract page icon from the HTML + pageIcon = extractPageIcon(doc); } + const id = collection.idGenerator(); const splitPath = path.split('/'); while (splitPath.length > 0) { @@ -96,6 +320,7 @@ async function importNotionZip({ splitPath.shift(); } pagePaths.push(path); + pagePathsWithIds.push({ path, pageId: id, icon: pageIcon }); if (entryId === undefined && lastSplitIndex === -1) { entryId = id; } @@ -166,7 +391,14 @@ async function importNotionZip({ const allPromises = await parseZipFile(imported); await Promise.all(allPromises.flat()); entryId = entryId ?? pageIds[0]; - return { entryId, pageIds, isWorkspaceFile, hasMarkdown }; + + // Build folder hierarchy from collected paths + const folderHierarchy = + pagePathsWithIds.length > 0 + ? buildFolderHierarchy(pagePathsWithIds) + : undefined; + + return { entryId, pageIds, isWorkspaceFile, hasMarkdown, folderHierarchy }; } export const NotionHtmlTransformer = { diff --git a/packages/frontend/core/src/desktop/dialogs/import/index.tsx b/packages/frontend/core/src/desktop/dialogs/import/index.tsx index ec46f4c421..207ef06658 100644 --- a/packages/frontend/core/src/desktop/dialogs/import/index.tsx +++ b/packages/frontend/core/src/desktop/dialogs/import/index.tsx @@ -1,4 +1,5 @@ import { Button, IconButton, Modal } from '@affine/component'; +import { IconType } from '@affine/component'; import { getStoreManager } from '@affine/core/blocksuite/manager/store'; import { useAsyncCallback } from '@affine/core/components/hooks/affine-async-hooks'; import { useNavigateHelper } from '@affine/core/components/hooks/use-navigate-helper'; @@ -7,6 +8,8 @@ import { GlobalDialogService, type WORKSPACE_DIALOG_SCHEMA, } from '@affine/core/modules/dialogs'; +import { ExplorerIconService } from '@affine/core/modules/explorer-icon/services/explorer-icon'; +import { OrganizeService } from '@affine/core/modules/organize'; import { UrlService } from '@affine/core/modules/url'; import { getAFFiNEWorkspaceSchema, @@ -48,6 +51,135 @@ import * as style from './styles.css'; const logger = new DebugLogger('import'); +type NotionPageIcon = { + type: 'emoji' | 'image'; + content: string; // emoji unicode or image URL/data +}; + +type FolderHierarchy = { + name: string; + path: string; + children: Map; + pageId?: string; + parentPath?: string; + icon?: NotionPageIcon; +}; + +// Helper function to create folder structure using OrganizeService +function createFolderStructure( + organizeService: OrganizeService, + hierarchy: FolderHierarchy, + parentFolderId: string | null = null, + explorerIconService?: ExplorerIconService +): { + folderId: string | null; + docLinks: Array<{ folderId: string; docId: string }>; +} { + const docLinks: Array<{ folderId: string; docId: string }> = []; + const rootFolder = organizeService.folderTree.rootFolder; + + function processHierarchyNode( + node: FolderHierarchy, + currentParentId: string | null + ): string | null { + let currentFolderId = currentParentId; + + // If this node represents a folder (has children but no pageId), create it + if (node.children.size > 0 && !node.pageId && node.name) { + const parent = currentParentId + ? organizeService.folderTree.folderNode$(currentParentId).value + : rootFolder; + + if (parent) { + const index = parent.indexAt('after'); + currentFolderId = parent.createFolder(node.name, index); + } + } + + // Process all children + for (const child of node.children.values()) { + if (child.pageId) { + // This is a document, link it to the current folder + if (currentFolderId) { + docLinks.push({ folderId: currentFolderId, docId: child.pageId }); + } + + // Set icon for the document if available + if (child.icon && explorerIconService) { + logger.debug('=== Setting icon for document ==='); + logger.debug('Document ID:', child.pageId); + logger.debug('Icon data:', child.icon); + + try { + let iconData; + if (child.icon.type === 'emoji') { + iconData = { + type: IconType.Emoji as const, + unicode: child.icon.content, + }; + logger.debug('Created emoji icon data:', iconData); + } else if (child.icon.type === 'image') { + // For image icons, we'd need to handle blob conversion + // For now, let's skip image icons or convert them to default + // This could be enhanced later to download and convert images to blobs + logger.debug( + 'Skipping image icon (not implemented):', + child.icon.content + ); + iconData = undefined; + } + + if (iconData) { + logger.debug('Calling explorerIconService.setIcon with:', { + where: 'doc', + id: child.pageId, + icon: iconData, + }); + explorerIconService.setIcon({ + where: 'doc', + id: child.pageId, + icon: iconData, + }); + logger.debug('Icon set successfully for document:', child.pageId); + } else { + logger.debug('No valid icon data to set'); + } + } catch (error) { + logger.error( + 'Error setting icon for document:', + child.pageId, + error + ); + logger.warn( + 'Failed to set icon for document:', + child.pageId, + error + ); + } + } else { + if (!child.icon) { + logger.debug('No icon found for document:', child.pageId); + } + if (!explorerIconService) { + logger.debug( + 'ExplorerIconService not available for document:', + child.pageId + ); + } + } + } else if (child.children.size > 0) { + // This is a subfolder, process it recursively + processHierarchyNode(child, currentFolderId); + } + } + + return currentFolderId; + } + + const rootFolderId = processHierarchyNode(hierarchy, parentFolderId); + return { folderId: rootFolderId, docLinks }; +} + type ImportType = | 'markdown' | 'markdownZip' @@ -61,6 +193,7 @@ type ImportResult = { docIds: string[]; entryId?: string; isWorkspaceFile?: boolean; + rootFolderId?: string; }; type ImportConfig = { @@ -68,7 +201,9 @@ type ImportConfig = { importFunction: ( docCollection: Workspace, files: File[], - handleImportAffineFile: () => Promise + handleImportAffineFile: () => Promise, + organizeService?: OrganizeService, + explorerIconService?: ExplorerIconService ) => Promise; }; @@ -160,7 +295,13 @@ const importOptions = [ const importConfigs: Record = { markdown: { fileOptions: { acceptType: 'Markdown', multiple: true }, - importFunction: async (docCollection, files) => { + importFunction: async ( + docCollection, + files, + _handleImportAffineFile, + _organizeService, + _explorerIconService + ) => { const docIds: string[] = []; for (const file of files) { const text = await file.text(); @@ -181,7 +322,13 @@ const importConfigs: Record = { }, markdownZip: { fileOptions: { acceptType: 'Zip', multiple: false }, - importFunction: async (docCollection, files) => { + importFunction: async ( + docCollection, + files, + _handleImportAffineFile, + _organizeService, + _explorerIconService + ) => { const file = files.length === 1 ? files[0] : null; if (!file) { throw new Error('Expected a single zip file for markdownZip import'); @@ -199,7 +346,13 @@ const importConfigs: Record = { }, html: { fileOptions: { acceptType: 'Html', multiple: true }, - importFunction: async (docCollection, files) => { + importFunction: async ( + docCollection, + files, + _handleImportAffineFile, + _organizeService, + _explorerIconService + ) => { const docIds: string[] = []; for (const file of files) { const text = await file.text(); @@ -220,28 +373,74 @@ const importConfigs: Record = { }, notion: { fileOptions: { acceptType: 'Zip', multiple: false }, - importFunction: async (docCollection, files) => { + importFunction: async ( + docCollection, + files, + _handleImportAffineFile, + organizeService, + explorerIconService + ) => { const file = files.length === 1 ? files[0] : null; if (!file) { throw new Error('Expected a single zip file for notion import'); } - const { entryId, pageIds, isWorkspaceFile } = + const { entryId, pageIds, isWorkspaceFile, folderHierarchy } = await NotionHtmlTransformer.importNotionZip({ collection: docCollection, schema: getAFFiNEWorkspaceSchema(), imported: file, extensions: getStoreManager().config.init().value.get('store'), }); + + let rootFolderId: string | undefined; + + // Create folder structure if hierarchy exists and OrganizeService is available + if ( + folderHierarchy && + organizeService && + folderHierarchy.children.size > 0 + ) { + try { + const { folderId, docLinks } = createFolderStructure( + organizeService, + folderHierarchy, + null, + explorerIconService + ); + rootFolderId = folderId || undefined; + + // Create links for all documents to their respective folders + for (const { folderId, docId } of docLinks) { + const folder = + organizeService.folderTree.folderNode$(folderId).value; + if (folder) { + const index = folder.indexAt('after'); + folder.createLink('doc', docId, index); + } + } + } catch (error) { + logger.warn('Failed to create folder structure:', error); + // Continue with import even if folder creation fails + } + } + return { docIds: pageIds, entryId, isWorkspaceFile, + rootFolderId, }; }, }, snapshot: { fileOptions: { acceptType: 'Zip', multiple: false }, - importFunction: async (docCollection, files) => { + importFunction: async ( + docCollection, + files, + _handleImportAffineFile, + _organizeService, + _explorerIconService + ) => { const file = files.length === 1 ? files[0] : null; if (!file) { throw new Error('Expected a single zip file for snapshot import'); @@ -263,7 +462,13 @@ const importConfigs: Record = { }, dotaffinefile: { fileOptions: { acceptType: 'Skip', multiple: false }, - importFunction: async (_, __, handleImportAffineFile) => { + importFunction: async ( + _, + __, + handleImportAffineFile, + _organizeService, + _explorerIconService + ) => { await handleImportAffineFile(); return { docIds: [], @@ -441,6 +646,8 @@ export const ImportDialog = ({ const [importResult, setImportResult] = useState(null); const workspace = useService(WorkspaceService).workspace; const docCollection = workspace.docCollection; + const organizeService = useService(OrganizeService); + const explorerIconService = useService(ExplorerIconService); const globalDialogService = useService(GlobalDialogService); @@ -514,14 +721,16 @@ export const ImportDialog = ({ }); } - const { docIds, entryId, isWorkspaceFile } = + const { docIds, entryId, isWorkspaceFile, rootFolderId } = await importConfig.importFunction( docCollection, files, - handleImportAffineFile + handleImportAffineFile, + organizeService, + explorerIconService ); - setImportResult({ docIds, entryId, isWorkspaceFile }); + setImportResult({ docIds, entryId, isWorkspaceFile, rootFolderId }); setStatus('success'); track.$.importModal.$.import({ type, @@ -546,7 +755,13 @@ export const ImportDialog = ({ logger.error('Failed to import', error); } }, - [docCollection, handleImportAffineFile, t] + [ + docCollection, + explorerIconService, + handleImportAffineFile, + organizeService, + t, + ] ); const handleComplete = useCallback(() => {