feat(editor): add obsidian vault import support (#14593)

fix #14592 ### Description > 🤖 **Note:** The code in this Pull Request were developed with the assistance of AI, but have been thoroughly reviewed and manually tested. > I noticed there's a check when opening an issue that asks _"Is your content generated by AI?"_, so I mention it here in case it's a deal breaker. If so I understand, you can close the PR, just wanted to share this in case it's useful anyways. This PR introduces **Obsidian Vault Import Support** to AFFiNE. Previously, users migrating from Obsidian had to rely on the generic Markdown importer, which often resulted in broken cross-links, missing directory structures, and metadata conflicts because Obsidian relies heavily on proprietary structures not supported by standard Markdown. This completely new feature makes migrating to AFFiNE easy. **Key Features & Implementations:** 1. **Vault (Directory) Selection** - Utilizes the `openDirectory` blocksuite utility in the import modal to allow users to select an entire folder directly from their filesystem, maintaining file context rather than forcing `.zip` uploads. 2. **Wikilink Resolution (Two-Pass Import)** - Restructured the `importObsidianVault` process into a two-pass architecture. - **Pass 1:** Discovers all files, assigns new AFFiNE document IDs, and maps them efficiently (by title, alias, and filename) into a high-performance hash map. - **Pass 2:** Processes the generic markdown AST and correctly maps custom `[[wikilinks]]` to the actual pre-registered AFFiNE blocksuite document IDs via `obsidianWikilinkToDeltaMatcher`. - Safely strips leading emojis from wikilink aliases to prevent duplicated page icons rendering mid-sentence. 3. **Emoji Metadata & State Fixes** - Implemented an aggressive, single-pass RegExp to extract multiple leading/combining emojis (`Emoji_Presentation` / `\ufe0f`) from H1 headers and Frontmatter. Emojis are assigned specifically to the page icon metadata property and cleanly stripped from the visual document title. - Fixed a core mutation bug where the loop iterating over existing `docMetas` was aggressively overwriting newly minted IDs for the current import batch. This fully resolves the issue where imported pages (especially re-imports) were incorrectly flagged as `trashed`. - Enforces explicit `trash: false` patch instructions. 4. **Syntax Conversion** - Implemented conversion of Obsidian-style Callouts (`> [!NOTE] Title`) into native AFFiNE block formats (`> 💡 **Title**`). - Hardened the `blockquote` parser so that nested structures (like `> - list items`) are fully preserved instead of discarded. ### UI Changes - Updated the Import Modal to include the "Import Obsidian Vault" flow utilizing the native filesystem directory picker. - Regenerated and synced `i18n-completenesses.json` correctly up to 100% across all supported locales for the new modal string additions. ### Testing Instructions 1. Navigate to the Workspace sidebar and click "Import". 2. Select "Obsidian" and use the directory picker to define a comprehensive Vault folder. 3. Validate that cross-links between documents automatically resolve to their specific AFFiNE instances. 4. Validate documents containing leading Emojis display exactly one Emoji (in the page icon area), and none duplicated in the actual title header. 5. Validate Callouts are rendered cleanly and correctly, and no documents are incorrectly marked as "Trash".  ## Summary by CodeRabbit * **New Features** * Import Obsidian vaults with wikilink resolution, emoji/title preservation, asset handling, and automatic document creation. * Folder-based imports via a Directory Picker (with hidden-input fallback) integrated into the import dialog. * **Localization** * Added Obsidian import label and tooltip translations. * **Tests** * Added end-to-end tests validating Obsidian vault import and asset handling.  --------- Co-authored-by: DarkSky <25152247+darkskygit@users.noreply.github.com> Co-authored-by: DarkSky <darksky2048@gmail.com>
2026-03-24 16:18:39 +08:00 · 2026-03-16 10:49:17 -06:00
parent 8406f9656e
commit 0b1a44863f
21 changed files with 1515 additions and 160 deletions
--- a/blocksuite/affine/widgets/linked-doc/src/transformers/markdown.ts
+++ b/blocksuite/affine/widgets/linked-doc/src/transformers/markdown.ts
@@ -21,8 +21,11 @@ import { extMimeMap, Transformer } from '@blocksuite/store';
 import type { AssetMap, ImportedFileEntry, PathBlobIdMap } from './type.js';
 import { createAssetsArchive, download, parseMatter, Unzip } from './utils.js';

-type ParsedFrontmatterMeta = Partial<
-  Pick<DocMeta, 'title' | 'createDate' | 'updatedDate' | 'tags' | 'favorite'>
+export type ParsedFrontmatterMeta = Partial<
+  Pick<
+    DocMeta,
+    'title' | 'createDate' | 'updatedDate' | 'tags' | 'favorite' | 'trash'
+  >
 >;

 const FRONTMATTER_KEYS = {
@@ -150,11 +153,18 @@ function buildMetaFromFrontmatter(
      }
      continue;
    }
+    if (FRONTMATTER_KEYS.trash.includes(key)) {
+      const trash = parseBoolean(value);
+      if (trash !== undefined) {
+        meta.trash = trash;
+      }
+      continue;
+    }
  }
  return meta;
 }

-function parseFrontmatter(markdown: string): {
+export function parseFrontmatter(markdown: string): {
  content: string;
  meta: ParsedFrontmatterMeta;
 } {
@@ -176,7 +186,7 @@ function parseFrontmatter(markdown: string): {
  }
 }

-function applyMetaPatch(
+export function applyMetaPatch(
  collection: Workspace,
  docId: string,
  meta: ParsedFrontmatterMeta
@@ -187,13 +197,14 @@ function applyMetaPatch(
  if (meta.updatedDate !== undefined) metaPatch.updatedDate = meta.updatedDate;
  if (meta.tags) metaPatch.tags = meta.tags;
  if (meta.favorite !== undefined) metaPatch.favorite = meta.favorite;
+  if (meta.trash !== undefined) metaPatch.trash = meta.trash;

  if (Object.keys(metaPatch).length) {
    collection.meta.setDocMeta(docId, metaPatch);
  }
 }

-function getProvider(extensions: ExtensionType[]) {
+export function getProvider(extensions: ExtensionType[]) {
  const container = new Container();
  extensions.forEach(ext => {
    ext.setup(container);
@@ -223,6 +234,103 @@ type ImportMarkdownZipOptions = {
  extensions: ExtensionType[];
 };

+/**
+ * Filters hidden/system entries that should never participate in imports.
+ */
+export function isSystemImportPath(path: string) {
+  return path.includes('__MACOSX') || path.includes('.DS_Store');
+}
+
+/**
+ * Creates the doc CRUD bridge used by importer transformers.
+ */
+export function createCollectionDocCRUD(collection: Workspace) {
+  return {
+    create: (id: string) => collection.createDoc(id).getStore({ id }),
+    get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
+    delete: (id: string) => collection.removeDoc(id),
+  };
+}
+
+type CreateMarkdownImportJobOptions = {
+  collection: Workspace;
+  schema: Schema;
+  preferredTitle?: string;
+  fullPath?: string;
+};
+
+/**
+ * Creates a markdown import job with the standard collection middlewares.
+ */
+export function createMarkdownImportJob({
+  collection,
+  schema,
+  preferredTitle,
+  fullPath,
+}: CreateMarkdownImportJobOptions) {
+  return new Transformer({
+    schema,
+    blobCRUD: collection.blobSync,
+    docCRUD: createCollectionDocCRUD(collection),
+    middlewares: [
+      defaultImageProxyMiddleware,
+      fileNameMiddleware(preferredTitle),
+      docLinkBaseURLMiddleware(collection.id),
+      ...(fullPath ? [filePathMiddleware(fullPath)] : []),
+    ],
+  });
+}
+
+type StageImportedAssetOptions = {
+  pendingAssets: AssetMap;
+  pendingPathBlobIdMap: PathBlobIdMap;
+  path: string;
+  content: Blob;
+  fileName: string;
+};
+
+/**
+ * Hashes a non-markdown import file and stages it into the shared asset maps.
+ */
+export async function stageImportedAsset({
+  pendingAssets,
+  pendingPathBlobIdMap,
+  path,
+  content,
+  fileName,
+}: StageImportedAssetOptions) {
+  const ext = path.split('.').at(-1) ?? '';
+  const mime = extMimeMap.get(ext.toLowerCase()) ?? '';
+  const key = await sha(await content.arrayBuffer());
+  pendingPathBlobIdMap.set(path, key);
+  pendingAssets.set(key, new File([content], fileName, { type: mime }));
+}
+
+/**
+ * Binds previously staged asset files into a transformer job before import.
+ */
+export function bindImportedAssetsToJob(
+  job: Transformer,
+  pendingAssets: AssetMap,
+  pendingPathBlobIdMap: PathBlobIdMap
+) {
+  const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
+  // Iterate over all assets to be imported
+  for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
+    // Get the relative path of the asset to the markdown file
+    // Store the path to blobId map
+    pathBlobIdMap.set(assetPath, key);
+    // Store the asset to assets, the key is the blobId, the value is the file object
+    // In block adapter, it will use the blobId to get the file object
+    const assetFile = pendingAssets.get(key);
+    if (assetFile) {
+      job.assets.set(key, assetFile);
+    }
+  }
+
+  return pathBlobIdMap;
+}
+
 /**
 * Exports a doc to a Markdown file or a zip archive containing Markdown and assets.
 * @param doc The doc to export
@@ -329,19 +437,10 @@ async function importMarkdownToDoc({
  const { content, meta } = parseFrontmatter(markdown);
  const preferredTitle = meta.title ?? fileName;
  const provider = getProvider(extensions);
-  const job = new Transformer({
+  const job = createMarkdownImportJob({
+    collection,
    schema,
-    blobCRUD: collection.blobSync,
-    docCRUD: {
-      create: (id: string) => collection.createDoc(id).getStore({ id }),
-      get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
-      delete: (id: string) => collection.removeDoc(id),
-    },
-    middlewares: [
-      defaultImageProxyMiddleware,
-      fileNameMiddleware(preferredTitle),
-      docLinkBaseURLMiddleware(collection.id),
-    ],
+    preferredTitle,
  });
  const mdAdapter = new MarkdownAdapter(job, provider);
  const page = await mdAdapter.toDoc({
@@ -381,7 +480,7 @@ async function importMarkdownZip({
  // Iterate over all files in the zip
  for (const { path, content: blob } of unzip) {
    // Skip the files that are not markdown files
-    if (path.includes('__MACOSX') || path.includes('.DS_Store')) {
+    if (isSystemImportPath(path)) {
      continue;
    }

@@ -395,12 +494,13 @@ async function importMarkdownZip({
        fullPath: path,
      });
    } else {
-      // If the file is not a markdown file, store it to pendingAssets
-      const ext = path.split('.').at(-1) ?? '';
-      const mime = extMimeMap.get(ext) ?? '';
-      const key = await sha(await blob.arrayBuffer());
-      pendingPathBlobIdMap.set(path, key);
-      pendingAssets.set(key, new File([blob], fileName, { type: mime }));
+      await stageImportedAsset({
+        pendingAssets,
+        pendingPathBlobIdMap,
+        path,
+        content: blob,
+        fileName,
+      });
    }
  }

@@ -411,34 +511,13 @@ async function importMarkdownZip({
      const markdown = await contentBlob.text();
      const { content, meta } = parseFrontmatter(markdown);
      const preferredTitle = meta.title ?? fileNameWithoutExt;
-      const job = new Transformer({
+      const job = createMarkdownImportJob({
+        collection,
        schema,
-        blobCRUD: collection.blobSync,
-        docCRUD: {
-          create: (id: string) => collection.createDoc(id).getStore({ id }),
-          get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
-          delete: (id: string) => collection.removeDoc(id),
-        },
-        middlewares: [
-          defaultImageProxyMiddleware,
-          fileNameMiddleware(preferredTitle),
-          docLinkBaseURLMiddleware(collection.id),
-          filePathMiddleware(fullPath),
-        ],
+        preferredTitle,
+        fullPath,
      });
-      const assets = job.assets;
-      const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
-      // Iterate over all assets to be imported
-      for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
-        // Get the relative path of the asset to the markdown file
-        // Store the path to blobId map
-        pathBlobIdMap.set(assetPath, key);
-        // Store the asset to assets, the key is the blobId, the value is the file object
-        // In block adapter, it will use the blobId to get the file object
-        if (pendingAssets.get(key)) {
-          assets.set(key, pendingAssets.get(key)!);
-        }
-      }
+      bindImportedAssetsToJob(job, pendingAssets, pendingPathBlobIdMap);

      const mdAdapter = new MarkdownAdapter(job, provider);
      const doc = await mdAdapter.toDoc({