feat(editor): add obsidian vault import support (#14593)

fix #14592 

### Description
> 🤖 **Note:** The code in this Pull Request were developed with the
assistance of AI, but have been thoroughly reviewed and manually tested.

> I noticed there's a check when opening an issue that asks _"Is your
content generated by AI?"_, so I mention it here in case it's a deal
breaker. If so I understand, you can close the PR, just wanted to share
this in case it's useful anyways.

This PR introduces **Obsidian Vault Import Support** to AFFiNE. 

Previously, users migrating from Obsidian had to rely on the generic
Markdown importer, which often resulted in broken cross-links, missing
directory structures, and metadata conflicts because Obsidian relies
heavily on proprietary structures not supported by standard Markdown.

This completely new feature makes migrating to AFFiNE easy.

**Key Features & Implementations:**

1. **Vault (Directory) Selection**
- Utilizes the `openDirectory` blocksuite utility in the import modal to
allow users to select an entire folder directly from their filesystem,
maintaining file context rather than forcing `.zip` uploads.

2. **Wikilink Resolution (Two-Pass Import)**
- Restructured the `importObsidianVault` process into a two-pass
architecture.
- **Pass 1:** Discovers all files, assigns new AFFiNE document IDs, and
maps them efficiently (by title, alias, and filename) into a
high-performance hash map.
- **Pass 2:** Processes the generic markdown AST and correctly maps
custom `[[wikilinks]]` to the actual pre-registered AFFiNE blocksuite
document IDs via `obsidianWikilinkToDeltaMatcher`.
- Safely strips leading emojis from wikilink aliases to prevent
duplicated page icons rendering mid-sentence.

3. **Emoji Metadata & State Fixes**
- Implemented an aggressive, single-pass RegExp to extract multiple
leading/combining emojis (`Emoji_Presentation` / `\ufe0f`) from H1
headers and Frontmatter. Emojis are assigned specifically to the page
icon metadata property and cleanly stripped from the visual document
title.
- Fixed a core mutation bug where the loop iterating over existing
`docMetas` was aggressively overwriting newly minted IDs for the current
import batch. This fully resolves the issue where imported pages
(especially re-imports) were incorrectly flagged as `trashed`.
   - Enforces explicit `trash: false` patch instructions.

4. **Syntax Conversion**
- Implemented conversion of Obsidian-style Callouts (`> [!NOTE] Title`)
into native AFFiNE block formats (`> 💡 **Title**`).
- Hardened the `blockquote` parser so that nested structures (like `> -
list items`) are fully preserved instead of discarded.

### UI Changes
- Updated the Import Modal to include the "Import Obsidian Vault" flow
utilizing the native filesystem directory picker.
- Regenerated and synced `i18n-completenesses.json` correctly up to 100%
across all supported locales for the new modal string additions.

### Testing Instructions
1. Navigate to the Workspace sidebar and click "Import".
2. Select "Obsidian" and use the directory picker to define a
comprehensive Vault folder.
3. Validate that cross-links between documents automatically resolve to
their specific AFFiNE instances.
4. Validate documents containing leading Emojis display exactly one
Emoji (in the page icon area), and none duplicated in the actual title
header.
5. Validate Callouts are rendered cleanly and correctly, and no
documents are incorrectly marked as "Trash".


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Import Obsidian vaults with wikilink resolution, emoji/title
preservation, asset handling, and automatic document creation.
* Folder-based imports via a Directory Picker (with hidden-input
fallback) integrated into the import dialog.

* **Localization**
  * Added Obsidian import label and tooltip translations.

* **Tests**
* Added end-to-end tests validating Obsidian vault import and asset
handling.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: DarkSky <25152247+darkskygit@users.noreply.github.com>
Co-authored-by: DarkSky <darksky2048@gmail.com>
This commit is contained in:
Francisco Jiménez
2026-03-16 10:49:17 -06:00
committed by GitHub
parent 8406f9656e
commit 0b1a44863f
21 changed files with 1515 additions and 160 deletions

View File

@@ -21,8 +21,11 @@ import { extMimeMap, Transformer } from '@blocksuite/store';
import type { AssetMap, ImportedFileEntry, PathBlobIdMap } from './type.js';
import { createAssetsArchive, download, parseMatter, Unzip } from './utils.js';
type ParsedFrontmatterMeta = Partial<
Pick<DocMeta, 'title' | 'createDate' | 'updatedDate' | 'tags' | 'favorite'>
export type ParsedFrontmatterMeta = Partial<
Pick<
DocMeta,
'title' | 'createDate' | 'updatedDate' | 'tags' | 'favorite' | 'trash'
>
>;
const FRONTMATTER_KEYS = {
@@ -150,11 +153,18 @@ function buildMetaFromFrontmatter(
}
continue;
}
if (FRONTMATTER_KEYS.trash.includes(key)) {
const trash = parseBoolean(value);
if (trash !== undefined) {
meta.trash = trash;
}
continue;
}
}
return meta;
}
function parseFrontmatter(markdown: string): {
export function parseFrontmatter(markdown: string): {
content: string;
meta: ParsedFrontmatterMeta;
} {
@@ -176,7 +186,7 @@ function parseFrontmatter(markdown: string): {
}
}
function applyMetaPatch(
export function applyMetaPatch(
collection: Workspace,
docId: string,
meta: ParsedFrontmatterMeta
@@ -187,13 +197,14 @@ function applyMetaPatch(
if (meta.updatedDate !== undefined) metaPatch.updatedDate = meta.updatedDate;
if (meta.tags) metaPatch.tags = meta.tags;
if (meta.favorite !== undefined) metaPatch.favorite = meta.favorite;
if (meta.trash !== undefined) metaPatch.trash = meta.trash;
if (Object.keys(metaPatch).length) {
collection.meta.setDocMeta(docId, metaPatch);
}
}
function getProvider(extensions: ExtensionType[]) {
export function getProvider(extensions: ExtensionType[]) {
const container = new Container();
extensions.forEach(ext => {
ext.setup(container);
@@ -223,6 +234,103 @@ type ImportMarkdownZipOptions = {
extensions: ExtensionType[];
};
/**
* Filters hidden/system entries that should never participate in imports.
*/
export function isSystemImportPath(path: string) {
return path.includes('__MACOSX') || path.includes('.DS_Store');
}
/**
* Creates the doc CRUD bridge used by importer transformers.
*/
export function createCollectionDocCRUD(collection: Workspace) {
return {
create: (id: string) => collection.createDoc(id).getStore({ id }),
get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
delete: (id: string) => collection.removeDoc(id),
};
}
type CreateMarkdownImportJobOptions = {
collection: Workspace;
schema: Schema;
preferredTitle?: string;
fullPath?: string;
};
/**
* Creates a markdown import job with the standard collection middlewares.
*/
export function createMarkdownImportJob({
collection,
schema,
preferredTitle,
fullPath,
}: CreateMarkdownImportJobOptions) {
return new Transformer({
schema,
blobCRUD: collection.blobSync,
docCRUD: createCollectionDocCRUD(collection),
middlewares: [
defaultImageProxyMiddleware,
fileNameMiddleware(preferredTitle),
docLinkBaseURLMiddleware(collection.id),
...(fullPath ? [filePathMiddleware(fullPath)] : []),
],
});
}
type StageImportedAssetOptions = {
pendingAssets: AssetMap;
pendingPathBlobIdMap: PathBlobIdMap;
path: string;
content: Blob;
fileName: string;
};
/**
* Hashes a non-markdown import file and stages it into the shared asset maps.
*/
export async function stageImportedAsset({
pendingAssets,
pendingPathBlobIdMap,
path,
content,
fileName,
}: StageImportedAssetOptions) {
const ext = path.split('.').at(-1) ?? '';
const mime = extMimeMap.get(ext.toLowerCase()) ?? '';
const key = await sha(await content.arrayBuffer());
pendingPathBlobIdMap.set(path, key);
pendingAssets.set(key, new File([content], fileName, { type: mime }));
}
/**
* Binds previously staged asset files into a transformer job before import.
*/
export function bindImportedAssetsToJob(
job: Transformer,
pendingAssets: AssetMap,
pendingPathBlobIdMap: PathBlobIdMap
) {
const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
// Iterate over all assets to be imported
for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
// Get the relative path of the asset to the markdown file
// Store the path to blobId map
pathBlobIdMap.set(assetPath, key);
// Store the asset to assets, the key is the blobId, the value is the file object
// In block adapter, it will use the blobId to get the file object
const assetFile = pendingAssets.get(key);
if (assetFile) {
job.assets.set(key, assetFile);
}
}
return pathBlobIdMap;
}
/**
* Exports a doc to a Markdown file or a zip archive containing Markdown and assets.
* @param doc The doc to export
@@ -329,19 +437,10 @@ async function importMarkdownToDoc({
const { content, meta } = parseFrontmatter(markdown);
const preferredTitle = meta.title ?? fileName;
const provider = getProvider(extensions);
const job = new Transformer({
const job = createMarkdownImportJob({
collection,
schema,
blobCRUD: collection.blobSync,
docCRUD: {
create: (id: string) => collection.createDoc(id).getStore({ id }),
get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
delete: (id: string) => collection.removeDoc(id),
},
middlewares: [
defaultImageProxyMiddleware,
fileNameMiddleware(preferredTitle),
docLinkBaseURLMiddleware(collection.id),
],
preferredTitle,
});
const mdAdapter = new MarkdownAdapter(job, provider);
const page = await mdAdapter.toDoc({
@@ -381,7 +480,7 @@ async function importMarkdownZip({
// Iterate over all files in the zip
for (const { path, content: blob } of unzip) {
// Skip the files that are not markdown files
if (path.includes('__MACOSX') || path.includes('.DS_Store')) {
if (isSystemImportPath(path)) {
continue;
}
@@ -395,12 +494,13 @@ async function importMarkdownZip({
fullPath: path,
});
} else {
// If the file is not a markdown file, store it to pendingAssets
const ext = path.split('.').at(-1) ?? '';
const mime = extMimeMap.get(ext) ?? '';
const key = await sha(await blob.arrayBuffer());
pendingPathBlobIdMap.set(path, key);
pendingAssets.set(key, new File([blob], fileName, { type: mime }));
await stageImportedAsset({
pendingAssets,
pendingPathBlobIdMap,
path,
content: blob,
fileName,
});
}
}
@@ -411,34 +511,13 @@ async function importMarkdownZip({
const markdown = await contentBlob.text();
const { content, meta } = parseFrontmatter(markdown);
const preferredTitle = meta.title ?? fileNameWithoutExt;
const job = new Transformer({
const job = createMarkdownImportJob({
collection,
schema,
blobCRUD: collection.blobSync,
docCRUD: {
create: (id: string) => collection.createDoc(id).getStore({ id }),
get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
delete: (id: string) => collection.removeDoc(id),
},
middlewares: [
defaultImageProxyMiddleware,
fileNameMiddleware(preferredTitle),
docLinkBaseURLMiddleware(collection.id),
filePathMiddleware(fullPath),
],
preferredTitle,
fullPath,
});
const assets = job.assets;
const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
// Iterate over all assets to be imported
for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
// Get the relative path of the asset to the markdown file
// Store the path to blobId map
pathBlobIdMap.set(assetPath, key);
// Store the asset to assets, the key is the blobId, the value is the file object
// In block adapter, it will use the blobId to get the file object
if (pendingAssets.get(key)) {
assets.set(key, pendingAssets.get(key)!);
}
}
bindImportedAssetsToJob(job, pendingAssets, pendingPathBlobIdMap);
const mdAdapter = new MarkdownAdapter(job, provider);
const doc = await mdAdapter.toDoc({