mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-03-24 16:18:39 +08:00
feat(editor): add obsidian vault import support (#14593)
fix #14592 ### Description > 🤖 **Note:** The code in this Pull Request were developed with the assistance of AI, but have been thoroughly reviewed and manually tested. > I noticed there's a check when opening an issue that asks _"Is your content generated by AI?"_, so I mention it here in case it's a deal breaker. If so I understand, you can close the PR, just wanted to share this in case it's useful anyways. This PR introduces **Obsidian Vault Import Support** to AFFiNE. Previously, users migrating from Obsidian had to rely on the generic Markdown importer, which often resulted in broken cross-links, missing directory structures, and metadata conflicts because Obsidian relies heavily on proprietary structures not supported by standard Markdown. This completely new feature makes migrating to AFFiNE easy. **Key Features & Implementations:** 1. **Vault (Directory) Selection** - Utilizes the `openDirectory` blocksuite utility in the import modal to allow users to select an entire folder directly from their filesystem, maintaining file context rather than forcing `.zip` uploads. 2. **Wikilink Resolution (Two-Pass Import)** - Restructured the `importObsidianVault` process into a two-pass architecture. - **Pass 1:** Discovers all files, assigns new AFFiNE document IDs, and maps them efficiently (by title, alias, and filename) into a high-performance hash map. - **Pass 2:** Processes the generic markdown AST and correctly maps custom `[[wikilinks]]` to the actual pre-registered AFFiNE blocksuite document IDs via `obsidianWikilinkToDeltaMatcher`. - Safely strips leading emojis from wikilink aliases to prevent duplicated page icons rendering mid-sentence. 3. **Emoji Metadata & State Fixes** - Implemented an aggressive, single-pass RegExp to extract multiple leading/combining emojis (`Emoji_Presentation` / `\ufe0f`) from H1 headers and Frontmatter. Emojis are assigned specifically to the page icon metadata property and cleanly stripped from the visual document title. - Fixed a core mutation bug where the loop iterating over existing `docMetas` was aggressively overwriting newly minted IDs for the current import batch. This fully resolves the issue where imported pages (especially re-imports) were incorrectly flagged as `trashed`. - Enforces explicit `trash: false` patch instructions. 4. **Syntax Conversion** - Implemented conversion of Obsidian-style Callouts (`> [!NOTE] Title`) into native AFFiNE block formats (`> 💡 **Title**`). - Hardened the `blockquote` parser so that nested structures (like `> - list items`) are fully preserved instead of discarded. ### UI Changes - Updated the Import Modal to include the "Import Obsidian Vault" flow utilizing the native filesystem directory picker. - Regenerated and synced `i18n-completenesses.json` correctly up to 100% across all supported locales for the new modal string additions. ### Testing Instructions 1. Navigate to the Workspace sidebar and click "Import". 2. Select "Obsidian" and use the directory picker to define a comprehensive Vault folder. 3. Validate that cross-links between documents automatically resolve to their specific AFFiNE instances. 4. Validate documents containing leading Emojis display exactly one Emoji (in the page icon area), and none duplicated in the actual title header. 5. Validate Callouts are rendered cleanly and correctly, and no documents are incorrectly marked as "Trash". <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Import Obsidian vaults with wikilink resolution, emoji/title preservation, asset handling, and automatic document creation. * Folder-based imports via a Directory Picker (with hidden-input fallback) integrated into the import dialog. * **Localization** * Added Obsidian import label and tooltip translations. * **Tests** * Added end-to-end tests validating Obsidian vault import and asset handling. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: DarkSky <25152247+darkskygit@users.noreply.github.com> Co-authored-by: DarkSky <darksky2048@gmail.com>
This commit is contained in:
committed by
GitHub
parent
8406f9656e
commit
0b1a44863f
@@ -7,6 +7,7 @@ import {
|
||||
NotionIcon,
|
||||
} from '@blocksuite/affine-components/icons';
|
||||
import {
|
||||
openDirectory,
|
||||
openFilesWith,
|
||||
openSingleFileWith,
|
||||
} from '@blocksuite/affine-shared/utils';
|
||||
@@ -18,11 +19,16 @@ import { query, state } from 'lit/decorators.js';
|
||||
import { HtmlTransformer } from '../transformers/html.js';
|
||||
import { MarkdownTransformer } from '../transformers/markdown.js';
|
||||
import { NotionHtmlTransformer } from '../transformers/notion-html.js';
|
||||
import { ObsidianTransformer } from '../transformers/obsidian.js';
|
||||
import { styles } from './styles.js';
|
||||
|
||||
export type OnSuccessHandler = (
|
||||
pageIds: string[],
|
||||
options: { isWorkspaceFile: boolean; importedCount: number }
|
||||
options: {
|
||||
isWorkspaceFile: boolean;
|
||||
importedCount: number;
|
||||
docEmojis?: Map<string, string>;
|
||||
}
|
||||
) => void;
|
||||
|
||||
export type OnFailHandler = (message: string) => void;
|
||||
@@ -140,6 +146,29 @@ export class ImportDoc extends WithDisposable(LitElement) {
|
||||
});
|
||||
}
|
||||
|
||||
private async _importObsidian() {
|
||||
const files = await openDirectory();
|
||||
if (!files || files.length === 0) return;
|
||||
const needLoading =
|
||||
files.reduce((acc, f) => acc + f.size, 0) > SHOW_LOADING_SIZE;
|
||||
if (needLoading) {
|
||||
this.hidden = false;
|
||||
this._loading = true;
|
||||
} else {
|
||||
this.abortController.abort();
|
||||
}
|
||||
const { docIds, docEmojis } = await ObsidianTransformer.importObsidianVault(
|
||||
{
|
||||
collection: this.collection,
|
||||
schema: this.schema,
|
||||
importedFiles: files,
|
||||
extensions: this.extensions,
|
||||
}
|
||||
);
|
||||
needLoading && this.abortController.abort();
|
||||
this._onImportSuccess(docIds, { docEmojis });
|
||||
}
|
||||
|
||||
private _onCloseClick(event: MouseEvent) {
|
||||
event.stopPropagation();
|
||||
this.abortController.abort();
|
||||
@@ -151,15 +180,21 @@ export class ImportDoc extends WithDisposable(LitElement) {
|
||||
|
||||
private _onImportSuccess(
|
||||
pageIds: string[],
|
||||
options: { isWorkspaceFile?: boolean; importedCount?: number } = {}
|
||||
options: {
|
||||
isWorkspaceFile?: boolean;
|
||||
importedCount?: number;
|
||||
docEmojis?: Map<string, string>;
|
||||
} = {}
|
||||
) {
|
||||
const {
|
||||
isWorkspaceFile = false,
|
||||
importedCount: pagesImportedCount = pageIds.length,
|
||||
docEmojis,
|
||||
} = options;
|
||||
this.onSuccess?.(pageIds, {
|
||||
isWorkspaceFile,
|
||||
importedCount: pagesImportedCount,
|
||||
docEmojis,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -258,6 +293,13 @@ export class ImportDoc extends WithDisposable(LitElement) {
|
||||
</affine-tooltip>
|
||||
</div>
|
||||
</icon-button>
|
||||
<icon-button
|
||||
class="button-item"
|
||||
text="Obsidian"
|
||||
@click="${this._importObsidian}"
|
||||
>
|
||||
${ExportToMarkdownIcon}
|
||||
</icon-button>
|
||||
<icon-button class="button-item" text="Coming soon..." disabled>
|
||||
${NewIcon}
|
||||
</icon-button>
|
||||
|
||||
@@ -2,6 +2,7 @@ export { DocxTransformer } from './docx.js';
|
||||
export { HtmlTransformer } from './html.js';
|
||||
export { MarkdownTransformer } from './markdown.js';
|
||||
export { NotionHtmlTransformer } from './notion-html.js';
|
||||
export { ObsidianTransformer } from './obsidian.js';
|
||||
export { PdfTransformer } from './pdf.js';
|
||||
export { createAssetsArchive, download } from './utils.js';
|
||||
export { ZipTransformer } from './zip.js';
|
||||
|
||||
@@ -21,8 +21,11 @@ import { extMimeMap, Transformer } from '@blocksuite/store';
|
||||
import type { AssetMap, ImportedFileEntry, PathBlobIdMap } from './type.js';
|
||||
import { createAssetsArchive, download, parseMatter, Unzip } from './utils.js';
|
||||
|
||||
type ParsedFrontmatterMeta = Partial<
|
||||
Pick<DocMeta, 'title' | 'createDate' | 'updatedDate' | 'tags' | 'favorite'>
|
||||
export type ParsedFrontmatterMeta = Partial<
|
||||
Pick<
|
||||
DocMeta,
|
||||
'title' | 'createDate' | 'updatedDate' | 'tags' | 'favorite' | 'trash'
|
||||
>
|
||||
>;
|
||||
|
||||
const FRONTMATTER_KEYS = {
|
||||
@@ -150,11 +153,18 @@ function buildMetaFromFrontmatter(
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (FRONTMATTER_KEYS.trash.includes(key)) {
|
||||
const trash = parseBoolean(value);
|
||||
if (trash !== undefined) {
|
||||
meta.trash = trash;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return meta;
|
||||
}
|
||||
|
||||
function parseFrontmatter(markdown: string): {
|
||||
export function parseFrontmatter(markdown: string): {
|
||||
content: string;
|
||||
meta: ParsedFrontmatterMeta;
|
||||
} {
|
||||
@@ -176,7 +186,7 @@ function parseFrontmatter(markdown: string): {
|
||||
}
|
||||
}
|
||||
|
||||
function applyMetaPatch(
|
||||
export function applyMetaPatch(
|
||||
collection: Workspace,
|
||||
docId: string,
|
||||
meta: ParsedFrontmatterMeta
|
||||
@@ -187,13 +197,14 @@ function applyMetaPatch(
|
||||
if (meta.updatedDate !== undefined) metaPatch.updatedDate = meta.updatedDate;
|
||||
if (meta.tags) metaPatch.tags = meta.tags;
|
||||
if (meta.favorite !== undefined) metaPatch.favorite = meta.favorite;
|
||||
if (meta.trash !== undefined) metaPatch.trash = meta.trash;
|
||||
|
||||
if (Object.keys(metaPatch).length) {
|
||||
collection.meta.setDocMeta(docId, metaPatch);
|
||||
}
|
||||
}
|
||||
|
||||
function getProvider(extensions: ExtensionType[]) {
|
||||
export function getProvider(extensions: ExtensionType[]) {
|
||||
const container = new Container();
|
||||
extensions.forEach(ext => {
|
||||
ext.setup(container);
|
||||
@@ -223,6 +234,103 @@ type ImportMarkdownZipOptions = {
|
||||
extensions: ExtensionType[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Filters hidden/system entries that should never participate in imports.
|
||||
*/
|
||||
export function isSystemImportPath(path: string) {
|
||||
return path.includes('__MACOSX') || path.includes('.DS_Store');
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the doc CRUD bridge used by importer transformers.
|
||||
*/
|
||||
export function createCollectionDocCRUD(collection: Workspace) {
|
||||
return {
|
||||
create: (id: string) => collection.createDoc(id).getStore({ id }),
|
||||
get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
|
||||
delete: (id: string) => collection.removeDoc(id),
|
||||
};
|
||||
}
|
||||
|
||||
type CreateMarkdownImportJobOptions = {
|
||||
collection: Workspace;
|
||||
schema: Schema;
|
||||
preferredTitle?: string;
|
||||
fullPath?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a markdown import job with the standard collection middlewares.
|
||||
*/
|
||||
export function createMarkdownImportJob({
|
||||
collection,
|
||||
schema,
|
||||
preferredTitle,
|
||||
fullPath,
|
||||
}: CreateMarkdownImportJobOptions) {
|
||||
return new Transformer({
|
||||
schema,
|
||||
blobCRUD: collection.blobSync,
|
||||
docCRUD: createCollectionDocCRUD(collection),
|
||||
middlewares: [
|
||||
defaultImageProxyMiddleware,
|
||||
fileNameMiddleware(preferredTitle),
|
||||
docLinkBaseURLMiddleware(collection.id),
|
||||
...(fullPath ? [filePathMiddleware(fullPath)] : []),
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
type StageImportedAssetOptions = {
|
||||
pendingAssets: AssetMap;
|
||||
pendingPathBlobIdMap: PathBlobIdMap;
|
||||
path: string;
|
||||
content: Blob;
|
||||
fileName: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Hashes a non-markdown import file and stages it into the shared asset maps.
|
||||
*/
|
||||
export async function stageImportedAsset({
|
||||
pendingAssets,
|
||||
pendingPathBlobIdMap,
|
||||
path,
|
||||
content,
|
||||
fileName,
|
||||
}: StageImportedAssetOptions) {
|
||||
const ext = path.split('.').at(-1) ?? '';
|
||||
const mime = extMimeMap.get(ext.toLowerCase()) ?? '';
|
||||
const key = await sha(await content.arrayBuffer());
|
||||
pendingPathBlobIdMap.set(path, key);
|
||||
pendingAssets.set(key, new File([content], fileName, { type: mime }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Binds previously staged asset files into a transformer job before import.
|
||||
*/
|
||||
export function bindImportedAssetsToJob(
|
||||
job: Transformer,
|
||||
pendingAssets: AssetMap,
|
||||
pendingPathBlobIdMap: PathBlobIdMap
|
||||
) {
|
||||
const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
|
||||
// Iterate over all assets to be imported
|
||||
for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
|
||||
// Get the relative path of the asset to the markdown file
|
||||
// Store the path to blobId map
|
||||
pathBlobIdMap.set(assetPath, key);
|
||||
// Store the asset to assets, the key is the blobId, the value is the file object
|
||||
// In block adapter, it will use the blobId to get the file object
|
||||
const assetFile = pendingAssets.get(key);
|
||||
if (assetFile) {
|
||||
job.assets.set(key, assetFile);
|
||||
}
|
||||
}
|
||||
|
||||
return pathBlobIdMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exports a doc to a Markdown file or a zip archive containing Markdown and assets.
|
||||
* @param doc The doc to export
|
||||
@@ -329,19 +437,10 @@ async function importMarkdownToDoc({
|
||||
const { content, meta } = parseFrontmatter(markdown);
|
||||
const preferredTitle = meta.title ?? fileName;
|
||||
const provider = getProvider(extensions);
|
||||
const job = new Transformer({
|
||||
const job = createMarkdownImportJob({
|
||||
collection,
|
||||
schema,
|
||||
blobCRUD: collection.blobSync,
|
||||
docCRUD: {
|
||||
create: (id: string) => collection.createDoc(id).getStore({ id }),
|
||||
get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
|
||||
delete: (id: string) => collection.removeDoc(id),
|
||||
},
|
||||
middlewares: [
|
||||
defaultImageProxyMiddleware,
|
||||
fileNameMiddleware(preferredTitle),
|
||||
docLinkBaseURLMiddleware(collection.id),
|
||||
],
|
||||
preferredTitle,
|
||||
});
|
||||
const mdAdapter = new MarkdownAdapter(job, provider);
|
||||
const page = await mdAdapter.toDoc({
|
||||
@@ -381,7 +480,7 @@ async function importMarkdownZip({
|
||||
// Iterate over all files in the zip
|
||||
for (const { path, content: blob } of unzip) {
|
||||
// Skip the files that are not markdown files
|
||||
if (path.includes('__MACOSX') || path.includes('.DS_Store')) {
|
||||
if (isSystemImportPath(path)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -395,12 +494,13 @@ async function importMarkdownZip({
|
||||
fullPath: path,
|
||||
});
|
||||
} else {
|
||||
// If the file is not a markdown file, store it to pendingAssets
|
||||
const ext = path.split('.').at(-1) ?? '';
|
||||
const mime = extMimeMap.get(ext) ?? '';
|
||||
const key = await sha(await blob.arrayBuffer());
|
||||
pendingPathBlobIdMap.set(path, key);
|
||||
pendingAssets.set(key, new File([blob], fileName, { type: mime }));
|
||||
await stageImportedAsset({
|
||||
pendingAssets,
|
||||
pendingPathBlobIdMap,
|
||||
path,
|
||||
content: blob,
|
||||
fileName,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -411,34 +511,13 @@ async function importMarkdownZip({
|
||||
const markdown = await contentBlob.text();
|
||||
const { content, meta } = parseFrontmatter(markdown);
|
||||
const preferredTitle = meta.title ?? fileNameWithoutExt;
|
||||
const job = new Transformer({
|
||||
const job = createMarkdownImportJob({
|
||||
collection,
|
||||
schema,
|
||||
blobCRUD: collection.blobSync,
|
||||
docCRUD: {
|
||||
create: (id: string) => collection.createDoc(id).getStore({ id }),
|
||||
get: (id: string) => collection.getDoc(id)?.getStore({ id }) ?? null,
|
||||
delete: (id: string) => collection.removeDoc(id),
|
||||
},
|
||||
middlewares: [
|
||||
defaultImageProxyMiddleware,
|
||||
fileNameMiddleware(preferredTitle),
|
||||
docLinkBaseURLMiddleware(collection.id),
|
||||
filePathMiddleware(fullPath),
|
||||
],
|
||||
preferredTitle,
|
||||
fullPath,
|
||||
});
|
||||
const assets = job.assets;
|
||||
const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
|
||||
// Iterate over all assets to be imported
|
||||
for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
|
||||
// Get the relative path of the asset to the markdown file
|
||||
// Store the path to blobId map
|
||||
pathBlobIdMap.set(assetPath, key);
|
||||
// Store the asset to assets, the key is the blobId, the value is the file object
|
||||
// In block adapter, it will use the blobId to get the file object
|
||||
if (pendingAssets.get(key)) {
|
||||
assets.set(key, pendingAssets.get(key)!);
|
||||
}
|
||||
}
|
||||
bindImportedAssetsToJob(job, pendingAssets, pendingPathBlobIdMap);
|
||||
|
||||
const mdAdapter = new MarkdownAdapter(job, provider);
|
||||
const doc = await mdAdapter.toDoc({
|
||||
|
||||
@@ -0,0 +1,732 @@
|
||||
import { FootNoteReferenceParamsSchema } from '@blocksuite/affine-model';
|
||||
import {
|
||||
BlockMarkdownAdapterExtension,
|
||||
createAttachmentBlockSnapshot,
|
||||
FULL_FILE_PATH_KEY,
|
||||
getImageFullPath,
|
||||
MarkdownAdapter,
|
||||
type MarkdownAST,
|
||||
MarkdownASTToDeltaExtension,
|
||||
normalizeFilePathReference,
|
||||
} from '@blocksuite/affine-shared/adapters';
|
||||
import type { AffineTextAttributes } from '@blocksuite/affine-shared/types';
|
||||
import type {
|
||||
DeltaInsert,
|
||||
ExtensionType,
|
||||
Schema,
|
||||
Workspace,
|
||||
} from '@blocksuite/store';
|
||||
import { extMimeMap, nanoid } from '@blocksuite/store';
|
||||
import type { Html, Text } from 'mdast';
|
||||
|
||||
import {
|
||||
applyMetaPatch,
|
||||
bindImportedAssetsToJob,
|
||||
createMarkdownImportJob,
|
||||
getProvider,
|
||||
isSystemImportPath,
|
||||
parseFrontmatter,
|
||||
stageImportedAsset,
|
||||
} from './markdown.js';
|
||||
import type {
|
||||
AssetMap,
|
||||
MarkdownFileImportEntry,
|
||||
PathBlobIdMap,
|
||||
} from './type.js';
|
||||
|
||||
const CALLOUT_TYPE_MAP: Record<string, string> = {
|
||||
note: '💡',
|
||||
info: 'ℹ️',
|
||||
tip: '🔥',
|
||||
hint: '✅',
|
||||
important: '‼️',
|
||||
warning: '⚠️',
|
||||
caution: '⚠️',
|
||||
attention: '⚠️',
|
||||
danger: '⚠️',
|
||||
error: '🚨',
|
||||
bug: '🐛',
|
||||
example: '📌',
|
||||
quote: '💬',
|
||||
cite: '💬',
|
||||
abstract: '📋',
|
||||
summary: '📋',
|
||||
todo: '☑️',
|
||||
success: '✅',
|
||||
check: '✅',
|
||||
done: '✅',
|
||||
failure: '❌',
|
||||
fail: '❌',
|
||||
missing: '❌',
|
||||
question: '❓',
|
||||
help: '❓',
|
||||
faq: '❓',
|
||||
};
|
||||
|
||||
const AMBIGUOUS_PAGE_LOOKUP = '__ambiguous__';
|
||||
const DEFAULT_CALLOUT_EMOJI = '💡';
|
||||
const OBSIDIAN_TEXT_FOOTNOTE_URL_PREFIX = 'data:text/plain;charset=utf-8,';
|
||||
const OBSIDIAN_ATTACHMENT_EMBED_TAG = 'obsidian-attachment';
|
||||
|
||||
function normalizeLookupKey(value: string): string {
|
||||
return normalizeFilePathReference(value).toLowerCase();
|
||||
}
|
||||
|
||||
function stripMarkdownExtension(value: string): string {
|
||||
return value.replace(/\.md$/i, '');
|
||||
}
|
||||
|
||||
function basename(value: string): string {
|
||||
return normalizeFilePathReference(value).split('/').pop() ?? value;
|
||||
}
|
||||
|
||||
function parseObsidianTarget(rawTarget: string): {
|
||||
path: string;
|
||||
fragment: string | null;
|
||||
} {
|
||||
const normalizedTarget = normalizeFilePathReference(rawTarget);
|
||||
const match = normalizedTarget.match(/^([^#^]+)([#^].*)?$/);
|
||||
|
||||
return {
|
||||
path: match?.[1]?.trim() ?? normalizedTarget,
|
||||
fragment: match?.[2] ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
function extractTitleAndEmoji(rawTitle: string): {
|
||||
title: string;
|
||||
emoji: string | null;
|
||||
} {
|
||||
const SINGLE_LEADING_EMOJI_RE =
|
||||
/^[\s\u200b]*((?:[\p{Emoji_Presentation}\p{Extended_Pictographic}\u200b]|\u200d|\ufe0f)+)/u;
|
||||
|
||||
let currentTitle = rawTitle;
|
||||
let extractedEmojiClusters = '';
|
||||
let emojiMatch;
|
||||
|
||||
while ((emojiMatch = currentTitle.match(SINGLE_LEADING_EMOJI_RE))) {
|
||||
const matchedCluster = emojiMatch[1].trim();
|
||||
extractedEmojiClusters +=
|
||||
(extractedEmojiClusters ? ' ' : '') + matchedCluster;
|
||||
currentTitle = currentTitle.slice(emojiMatch[0].length);
|
||||
}
|
||||
|
||||
return {
|
||||
title: currentTitle.trim(),
|
||||
emoji: extractedEmojiClusters || null,
|
||||
};
|
||||
}
|
||||
|
||||
function preprocessTitleHeader(markdown: string): string {
|
||||
return markdown.replace(
|
||||
/^(\s*#\s+)(.*)$/m,
|
||||
(_, headerPrefix, titleContent) => {
|
||||
const { title: cleanTitle } = extractTitleAndEmoji(titleContent);
|
||||
return `${headerPrefix}${cleanTitle}`;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function preprocessObsidianCallouts(markdown: string): string {
|
||||
return markdown.replace(
|
||||
/^(> *)\[!([^\]\n]+)\]([+-]?)([^\n]*)/gm,
|
||||
(_, prefix, type, _fold, rest) => {
|
||||
const calloutToken =
|
||||
CALLOUT_TYPE_MAP[type.trim().toLowerCase()] ?? DEFAULT_CALLOUT_EMOJI;
|
||||
const title = rest.trim();
|
||||
return title
|
||||
? `${prefix}[!${calloutToken}] ${title}`
|
||||
: `${prefix}[!${calloutToken}]`;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function isStructuredFootnoteDefinition(content: string): boolean {
|
||||
try {
|
||||
return FootNoteReferenceParamsSchema.safeParse(JSON.parse(content.trim()))
|
||||
.success;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function splitFootnoteTextContent(content: string): {
|
||||
title: string;
|
||||
description?: string;
|
||||
} {
|
||||
const lines = content
|
||||
.split('\n')
|
||||
.map(line => line.trim())
|
||||
.filter(Boolean);
|
||||
const title = lines[0] ?? content.trim();
|
||||
const description = lines.slice(1).join('\n').trim();
|
||||
|
||||
return {
|
||||
title,
|
||||
...(description ? { description } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function createTextFootnoteDefinition(content: string): string {
|
||||
const normalizedContent = content.trim();
|
||||
const { title, description } = splitFootnoteTextContent(normalizedContent);
|
||||
|
||||
return JSON.stringify({
|
||||
type: 'url',
|
||||
url: encodeURIComponent(
|
||||
`${OBSIDIAN_TEXT_FOOTNOTE_URL_PREFIX}${encodeURIComponent(
|
||||
normalizedContent
|
||||
)}`
|
||||
),
|
||||
title,
|
||||
...(description ? { description } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
function extractObsidianFootnotes(markdown: string): {
|
||||
content: string;
|
||||
footnotes: string[];
|
||||
} {
|
||||
const lines = markdown.split('\n');
|
||||
const output: string[] = [];
|
||||
const footnotes: string[] = [];
|
||||
|
||||
for (let index = 0; index < lines.length; index += 1) {
|
||||
const line = lines[index];
|
||||
const match = line.match(/^\[\^([^\]]+)\]:\s*(.*)$/);
|
||||
if (!match) {
|
||||
output.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
const identifier = match[1];
|
||||
const contentLines = [match[2]];
|
||||
|
||||
while (index + 1 < lines.length) {
|
||||
const nextLine = lines[index + 1];
|
||||
if (/^(?: {1,4}|\t)/.test(nextLine)) {
|
||||
contentLines.push(nextLine.replace(/^(?: {1,4}|\t)/, ''));
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
nextLine.trim() === '' &&
|
||||
index + 2 < lines.length &&
|
||||
/^(?: {1,4}|\t)/.test(lines[index + 2])
|
||||
) {
|
||||
contentLines.push('');
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
const content = contentLines.join('\n').trim();
|
||||
footnotes.push(
|
||||
`[^${identifier}]: ${
|
||||
!content || isStructuredFootnoteDefinition(content)
|
||||
? content
|
||||
: createTextFootnoteDefinition(content)
|
||||
}`
|
||||
);
|
||||
}
|
||||
|
||||
return { content: output.join('\n'), footnotes };
|
||||
}
|
||||
|
||||
function buildLookupKeys(
|
||||
targetPath: string,
|
||||
currentFilePath?: string
|
||||
): string[] {
|
||||
const parsedTargetPath = normalizeFilePathReference(targetPath);
|
||||
if (!parsedTargetPath) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const keys = new Set<string>();
|
||||
const addPathVariants = (value: string) => {
|
||||
const normalizedValue = normalizeFilePathReference(value);
|
||||
if (!normalizedValue) {
|
||||
return;
|
||||
}
|
||||
|
||||
keys.add(normalizedValue);
|
||||
keys.add(stripMarkdownExtension(normalizedValue));
|
||||
|
||||
const fileName = basename(normalizedValue);
|
||||
keys.add(fileName);
|
||||
keys.add(stripMarkdownExtension(fileName));
|
||||
|
||||
const cleanTitle = extractTitleAndEmoji(
|
||||
stripMarkdownExtension(fileName)
|
||||
).title;
|
||||
if (cleanTitle) {
|
||||
keys.add(cleanTitle);
|
||||
}
|
||||
};
|
||||
|
||||
addPathVariants(parsedTargetPath);
|
||||
|
||||
if (currentFilePath) {
|
||||
addPathVariants(getImageFullPath(currentFilePath, parsedTargetPath));
|
||||
}
|
||||
|
||||
return Array.from(keys).map(normalizeLookupKey);
|
||||
}
|
||||
|
||||
function registerPageLookup(
|
||||
pageLookupMap: Map<string, string>,
|
||||
key: string,
|
||||
pageId: string
|
||||
) {
|
||||
const normalizedKey = normalizeLookupKey(key);
|
||||
if (!normalizedKey) {
|
||||
return;
|
||||
}
|
||||
|
||||
const existing = pageLookupMap.get(normalizedKey);
|
||||
if (existing && existing !== pageId) {
|
||||
pageLookupMap.set(normalizedKey, AMBIGUOUS_PAGE_LOOKUP);
|
||||
return;
|
||||
}
|
||||
|
||||
pageLookupMap.set(normalizedKey, pageId);
|
||||
}
|
||||
|
||||
function resolvePageIdFromLookup(
|
||||
pageLookupMap: Pick<ReadonlyMap<string, string>, 'get'>,
|
||||
rawTarget: string,
|
||||
currentFilePath?: string
|
||||
): string | null {
|
||||
const { path } = parseObsidianTarget(rawTarget);
|
||||
for (const key of buildLookupKeys(path, currentFilePath)) {
|
||||
const targetPageId = pageLookupMap.get(key);
|
||||
if (!targetPageId || targetPageId === AMBIGUOUS_PAGE_LOOKUP) {
|
||||
continue;
|
||||
}
|
||||
return targetPageId;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function resolveWikilinkDisplayTitle(
|
||||
rawAlias: string | undefined,
|
||||
pageEmoji: string | undefined
|
||||
): string | undefined {
|
||||
if (!rawAlias) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const { title: aliasTitle, emoji: aliasEmoji } =
|
||||
extractTitleAndEmoji(rawAlias);
|
||||
|
||||
if (aliasEmoji && aliasEmoji === pageEmoji) {
|
||||
return aliasTitle;
|
||||
}
|
||||
|
||||
return rawAlias;
|
||||
}
|
||||
|
||||
function isImageAssetPath(path: string): boolean {
|
||||
const extension = path.split('.').at(-1)?.toLowerCase() ?? '';
|
||||
return extMimeMap.get(extension)?.startsWith('image/') ?? false;
|
||||
}
|
||||
|
||||
function encodeMarkdownPath(path: string): string {
|
||||
return encodeURI(path).replaceAll('(', '%28').replaceAll(')', '%29');
|
||||
}
|
||||
|
||||
function escapeMarkdownLabel(label: string): string {
|
||||
return label.replace(/[[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
function isObsidianSizeAlias(alias: string | undefined): boolean {
|
||||
return !!alias && /^\d+(?:x\d+)?$/i.test(alias.trim());
|
||||
}
|
||||
|
||||
function getEmbedLabel(
|
||||
rawAlias: string | undefined,
|
||||
targetPath: string,
|
||||
fallbackToFileName: boolean
|
||||
): string {
|
||||
if (!rawAlias || isObsidianSizeAlias(rawAlias)) {
|
||||
return fallbackToFileName
|
||||
? stripMarkdownExtension(basename(targetPath))
|
||||
: '';
|
||||
}
|
||||
|
||||
return rawAlias.trim();
|
||||
}
|
||||
|
||||
type ObsidianAttachmentEmbed = {
|
||||
blobId: string;
|
||||
fileName: string;
|
||||
fileType: string;
|
||||
};
|
||||
|
||||
function createObsidianAttach(embed: ObsidianAttachmentEmbed): string {
|
||||
return `<!-- ${OBSIDIAN_ATTACHMENT_EMBED_TAG} ${encodeURIComponent(
|
||||
JSON.stringify(embed)
|
||||
)} -->`;
|
||||
}
|
||||
|
||||
function parseObsidianAttach(value: string): ObsidianAttachmentEmbed | null {
|
||||
const match = value.match(
|
||||
new RegExp(`^<!-- ${OBSIDIAN_ATTACHMENT_EMBED_TAG} ([^ ]+) -->$`)
|
||||
);
|
||||
if (!match?.[1]) return null;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(
|
||||
decodeURIComponent(match[1])
|
||||
) as ObsidianAttachmentEmbed;
|
||||
if (!parsed.blobId || !parsed.fileName) {
|
||||
return null;
|
||||
}
|
||||
return parsed;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function preprocessObsidianEmbeds(
|
||||
markdown: string,
|
||||
filePath: string,
|
||||
pageLookupMap: ReadonlyMap<string, string>,
|
||||
pathBlobIdMap: ReadonlyMap<string, string>
|
||||
): string {
|
||||
return markdown.replace(
|
||||
/!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]/g,
|
||||
(match, rawTarget: string, rawAlias?: string) => {
|
||||
const targetPageId = resolvePageIdFromLookup(
|
||||
pageLookupMap,
|
||||
rawTarget,
|
||||
filePath
|
||||
);
|
||||
if (targetPageId) {
|
||||
return `[[${rawTarget}${rawAlias ? `|${rawAlias}` : ''}]]`;
|
||||
}
|
||||
|
||||
const { path } = parseObsidianTarget(rawTarget);
|
||||
if (!path) {
|
||||
return match;
|
||||
}
|
||||
|
||||
const assetPath = getImageFullPath(filePath, path);
|
||||
const encodedPath = encodeMarkdownPath(assetPath);
|
||||
|
||||
if (isImageAssetPath(path)) {
|
||||
const alt = getEmbedLabel(rawAlias, path, false);
|
||||
return ``;
|
||||
}
|
||||
|
||||
const label = getEmbedLabel(rawAlias, path, true);
|
||||
const blobId = pathBlobIdMap.get(assetPath);
|
||||
if (!blobId) return `[${escapeMarkdownLabel(label)}](${encodedPath})`;
|
||||
|
||||
const extension = path.split('.').at(-1)?.toLowerCase() ?? '';
|
||||
return createObsidianAttach({
|
||||
blobId,
|
||||
fileName: basename(path),
|
||||
fileType: extMimeMap.get(extension) ?? '',
|
||||
});
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function preprocessObsidianMarkdown(
|
||||
markdown: string,
|
||||
filePath: string,
|
||||
pageLookupMap: ReadonlyMap<string, string>,
|
||||
pathBlobIdMap: ReadonlyMap<string, string>
|
||||
): string {
|
||||
const { content: contentWithoutFootnotes, footnotes: extractedFootnotes } =
|
||||
extractObsidianFootnotes(markdown);
|
||||
const content = preprocessObsidianEmbeds(
|
||||
contentWithoutFootnotes,
|
||||
filePath,
|
||||
pageLookupMap,
|
||||
pathBlobIdMap
|
||||
);
|
||||
const normalizedMarkdown = preprocessTitleHeader(
|
||||
preprocessObsidianCallouts(content)
|
||||
);
|
||||
|
||||
if (extractedFootnotes.length === 0) {
|
||||
return normalizedMarkdown;
|
||||
}
|
||||
|
||||
const trimmedMarkdown = normalizedMarkdown.replace(/\s+$/, '');
|
||||
return `${trimmedMarkdown}\n\n${extractedFootnotes.join('\n\n')}\n`;
|
||||
}
|
||||
|
||||
function isObsidianAttachmentEmbedNode(node: MarkdownAST): node is Html {
|
||||
return node.type === 'html' && !!parseObsidianAttach(node.value);
|
||||
}
|
||||
|
||||
export const obsidianAttachmentEmbedMarkdownAdapterMatcher =
|
||||
BlockMarkdownAdapterExtension({
|
||||
flavour: 'obsidian:attachment-embed',
|
||||
toMatch: o => isObsidianAttachmentEmbedNode(o.node),
|
||||
fromMatch: () => false,
|
||||
toBlockSnapshot: {
|
||||
enter: (o, context) => {
|
||||
if (!isObsidianAttachmentEmbedNode(o.node)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const attachment = parseObsidianAttach(o.node.value);
|
||||
if (!attachment) {
|
||||
return;
|
||||
}
|
||||
|
||||
const assetFile = context.assets?.getAssets().get(attachment.blobId);
|
||||
context.walkerContext
|
||||
.openNode(
|
||||
createAttachmentBlockSnapshot({
|
||||
id: nanoid(),
|
||||
props: {
|
||||
name: attachment.fileName,
|
||||
size: assetFile?.size ?? 0,
|
||||
type:
|
||||
attachment.fileType ||
|
||||
assetFile?.type ||
|
||||
'application/octet-stream',
|
||||
sourceId: attachment.blobId,
|
||||
embed: false,
|
||||
style: 'horizontalThin',
|
||||
footnoteIdentifier: null,
|
||||
},
|
||||
}),
|
||||
'children'
|
||||
)
|
||||
.closeNode();
|
||||
(o.node as unknown as { type: string }).type =
|
||||
'obsidianAttachmentEmbed';
|
||||
},
|
||||
},
|
||||
fromBlockSnapshot: {},
|
||||
});
|
||||
|
||||
export const obsidianWikilinkToDeltaMatcher = MarkdownASTToDeltaExtension({
|
||||
name: 'obsidian-wikilink',
|
||||
match: ast => ast.type === 'text',
|
||||
toDelta: (ast, context) => {
|
||||
const textNode = ast as Text;
|
||||
if (!textNode.value) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const nodeContent = textNode.value;
|
||||
const wikilinkRegex = /\[\[([^\]|]+)(?:\|([^\]]+))?\]\]/g;
|
||||
const deltas: DeltaInsert<AffineTextAttributes>[] = [];
|
||||
|
||||
let lastProcessedIndex = 0;
|
||||
let linkMatch;
|
||||
|
||||
while ((linkMatch = wikilinkRegex.exec(nodeContent)) !== null) {
|
||||
if (linkMatch.index > lastProcessedIndex) {
|
||||
deltas.push({
|
||||
insert: nodeContent.substring(lastProcessedIndex, linkMatch.index),
|
||||
});
|
||||
}
|
||||
|
||||
const targetPageName = linkMatch[1].trim();
|
||||
const alias = linkMatch[2]?.trim();
|
||||
const currentFilePath = context.configs.get(FULL_FILE_PATH_KEY);
|
||||
const targetPageId = resolvePageIdFromLookup(
|
||||
{ get: key => context.configs.get(`obsidian:pageId:${key}`) },
|
||||
targetPageName,
|
||||
typeof currentFilePath === 'string' ? currentFilePath : undefined
|
||||
);
|
||||
|
||||
if (targetPageId) {
|
||||
const pageEmoji = context.configs.get(
|
||||
'obsidian:pageEmoji:' + targetPageId
|
||||
);
|
||||
const displayTitle = resolveWikilinkDisplayTitle(alias, pageEmoji);
|
||||
|
||||
deltas.push({
|
||||
insert: ' ',
|
||||
attributes: {
|
||||
reference: {
|
||||
type: 'LinkedPage',
|
||||
pageId: targetPageId,
|
||||
...(displayTitle ? { title: displayTitle } : {}),
|
||||
},
|
||||
},
|
||||
});
|
||||
} else {
|
||||
deltas.push({ insert: linkMatch[0] });
|
||||
}
|
||||
|
||||
lastProcessedIndex = wikilinkRegex.lastIndex;
|
||||
}
|
||||
|
||||
if (lastProcessedIndex < nodeContent.length) {
|
||||
deltas.push({ insert: nodeContent.substring(lastProcessedIndex) });
|
||||
}
|
||||
|
||||
return deltas;
|
||||
},
|
||||
});
|
||||
|
||||
export type ImportObsidianVaultOptions = {
|
||||
collection: Workspace;
|
||||
schema: Schema;
|
||||
importedFiles: File[];
|
||||
extensions: ExtensionType[];
|
||||
};
|
||||
|
||||
export type ImportObsidianVaultResult = {
|
||||
docIds: string[];
|
||||
docEmojis: Map<string, string>;
|
||||
};
|
||||
|
||||
export async function importObsidianVault({
|
||||
collection,
|
||||
schema,
|
||||
importedFiles,
|
||||
extensions,
|
||||
}: ImportObsidianVaultOptions): Promise<ImportObsidianVaultResult> {
|
||||
const provider = getProvider([
|
||||
obsidianWikilinkToDeltaMatcher,
|
||||
obsidianAttachmentEmbedMarkdownAdapterMatcher,
|
||||
...extensions,
|
||||
]);
|
||||
|
||||
const docIds: string[] = [];
|
||||
const docEmojis = new Map<string, string>();
|
||||
const pendingAssets: AssetMap = new Map();
|
||||
const pendingPathBlobIdMap: PathBlobIdMap = new Map();
|
||||
const markdownBlobs: MarkdownFileImportEntry[] = [];
|
||||
const pageLookupMap = new Map<string, string>();
|
||||
|
||||
for (const file of importedFiles) {
|
||||
const filePath = file.webkitRelativePath || file.name;
|
||||
if (isSystemImportPath(filePath)) continue;
|
||||
|
||||
if (file.name.endsWith('.md')) {
|
||||
const fileNameWithoutExt = file.name.replace(/\.[^/.]+$/, '');
|
||||
const markdown = await file.text();
|
||||
const { content, meta } = parseFrontmatter(markdown);
|
||||
|
||||
const documentTitleCandidate = meta.title ?? fileNameWithoutExt;
|
||||
const { title: preferredTitle, emoji: leadingEmoji } =
|
||||
extractTitleAndEmoji(documentTitleCandidate);
|
||||
|
||||
const newPageId = collection.idGenerator();
|
||||
registerPageLookup(pageLookupMap, filePath, newPageId);
|
||||
registerPageLookup(
|
||||
pageLookupMap,
|
||||
stripMarkdownExtension(filePath),
|
||||
newPageId
|
||||
);
|
||||
registerPageLookup(pageLookupMap, file.name, newPageId);
|
||||
registerPageLookup(pageLookupMap, fileNameWithoutExt, newPageId);
|
||||
registerPageLookup(pageLookupMap, documentTitleCandidate, newPageId);
|
||||
registerPageLookup(pageLookupMap, preferredTitle, newPageId);
|
||||
|
||||
if (leadingEmoji) {
|
||||
docEmojis.set(newPageId, leadingEmoji);
|
||||
}
|
||||
|
||||
markdownBlobs.push({
|
||||
filename: file.name,
|
||||
contentBlob: file,
|
||||
fullPath: filePath,
|
||||
pageId: newPageId,
|
||||
preferredTitle,
|
||||
content,
|
||||
meta,
|
||||
});
|
||||
} else {
|
||||
await stageImportedAsset({
|
||||
pendingAssets,
|
||||
pendingPathBlobIdMap,
|
||||
path: filePath,
|
||||
content: file,
|
||||
fileName: file.name,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (const existingDocMeta of collection.meta.docMetas) {
|
||||
if (existingDocMeta.title) {
|
||||
registerPageLookup(
|
||||
pageLookupMap,
|
||||
existingDocMeta.title,
|
||||
existingDocMeta.id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(
|
||||
markdownBlobs.map(async markdownFile => {
|
||||
const {
|
||||
fullPath,
|
||||
pageId: predefinedId,
|
||||
preferredTitle,
|
||||
content,
|
||||
meta,
|
||||
} = markdownFile;
|
||||
|
||||
const job = createMarkdownImportJob({
|
||||
collection,
|
||||
schema,
|
||||
preferredTitle,
|
||||
fullPath,
|
||||
});
|
||||
|
||||
for (const [lookupKey, id] of pageLookupMap.entries()) {
|
||||
if (id === AMBIGUOUS_PAGE_LOOKUP) {
|
||||
continue;
|
||||
}
|
||||
job.adapterConfigs.set(`obsidian:pageId:${lookupKey}`, id);
|
||||
}
|
||||
for (const [id, emoji] of docEmojis.entries()) {
|
||||
job.adapterConfigs.set('obsidian:pageEmoji:' + id, emoji);
|
||||
}
|
||||
|
||||
const pathBlobIdMap = bindImportedAssetsToJob(
|
||||
job,
|
||||
pendingAssets,
|
||||
pendingPathBlobIdMap
|
||||
);
|
||||
|
||||
const preprocessedMarkdown = preprocessObsidianMarkdown(
|
||||
content,
|
||||
fullPath,
|
||||
pageLookupMap,
|
||||
pathBlobIdMap
|
||||
);
|
||||
const mdAdapter = new MarkdownAdapter(job, provider);
|
||||
const snapshot = await mdAdapter.toDocSnapshot({
|
||||
file: preprocessedMarkdown,
|
||||
assets: job.assetsManager,
|
||||
});
|
||||
|
||||
if (snapshot) {
|
||||
snapshot.meta.id = predefinedId;
|
||||
const doc = await job.snapshotToDoc(snapshot);
|
||||
if (doc) {
|
||||
applyMetaPatch(collection, doc.id, {
|
||||
...meta,
|
||||
title: preferredTitle,
|
||||
trash: false,
|
||||
});
|
||||
docIds.push(doc.id);
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
return { docIds, docEmojis };
|
||||
}
|
||||
|
||||
export const ObsidianTransformer = {
|
||||
importObsidianVault,
|
||||
};
|
||||
@@ -1,3 +1,5 @@
|
||||
import type { ParsedFrontmatterMeta } from './markdown.js';
|
||||
|
||||
/**
|
||||
* Represents an imported file entry in the zip archive
|
||||
*/
|
||||
@@ -10,6 +12,13 @@ export type ImportedFileEntry = {
|
||||
fullPath: string;
|
||||
};
|
||||
|
||||
export type MarkdownFileImportEntry = ImportedFileEntry & {
|
||||
pageId: string;
|
||||
preferredTitle: string;
|
||||
content: string;
|
||||
meta: ParsedFrontmatterMeta;
|
||||
};
|
||||
|
||||
/**
|
||||
* Map of asset hash to File object for all media files in the zip
|
||||
* Key: SHA hash of the file content (blobId)
|
||||
|
||||
Reference in New Issue
Block a user