fix(editor): support relative image reference path when importing zip with images (#12264)

Closes: [BS-3385](https://linear.app/affine-design/issue/BS-3385/markdown类型的导入,支持media文件和md文件不在同目录的情况)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Added utility functions to resolve and normalize image file paths in markdown and HTML imports.
  - Introduced middleware to provide full file path context during file import and transformation.
  - Added new types for improved asset and file management in zip imports.

- **Refactor**
  - Centralized and simplified image processing logic across HTML, Markdown, and Notion HTML adapters for improved maintainability.
  - Enhanced type safety and clarity in file and asset handling during zip imports.

- **Tests**
  - Added comprehensive tests for image file path resolution utility.

- **Documentation**
  - Improved inline code comments explaining file path resolution logic.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
donteatfriedrice
2025-05-14 02:30:30 +00:00
parent 2f8d8dbc1e
commit 26ece014f1
13 changed files with 320 additions and 215 deletions

View File

@@ -2,12 +2,11 @@ import { ImageBlockSchema } from '@blocksuite/affine-model';
import {
BlockHtmlAdapterExtension,
type BlockHtmlAdapterMatcher,
FetchUtils,
HastUtils,
} from '@blocksuite/affine-shared/adapters';
import { getFilenameFromContentDisposition } from '@blocksuite/affine-shared/utils';
import { sha } from '@blocksuite/global/utils';
import { getAssetName, nanoid } from '@blocksuite/store';
import { getAssetName } from '@blocksuite/store';
import { processImageNodeToBlock } from './utils';
export const imageBlockHtmlAdapterMatcher: BlockHtmlAdapterMatcher = {
flavour: ImageBlockSchema.model.flavour,
@@ -25,64 +24,10 @@ export const imageBlockHtmlAdapterMatcher: BlockHtmlAdapterMatcher = {
const image = o.node;
const imageURL =
typeof image?.properties.src === 'string' ? image.properties.src : '';
if (imageURL) {
let blobId = '';
if (!FetchUtils.fetchable(imageURL)) {
const imageURLSplit = imageURL.split('/');
while (imageURLSplit.length > 0) {
const key = assets
.getPathBlobIdMap()
.get(decodeURIComponent(imageURLSplit.join('/')));
if (key) {
blobId = key;
break;
}
imageURLSplit.shift();
}
} else {
try {
const res = await FetchUtils.fetchImage(
imageURL,
undefined,
configs.get('imageProxy') as string
);
if (!res) {
return;
}
const clonedRes = res.clone();
const name =
getFilenameFromContentDisposition(
res.headers.get('Content-Disposition') ?? ''
) ??
(imageURL.split('/').at(-1) ?? 'image') +
'.' +
(res.headers.get('Content-Type')?.split('/').at(-1) ?? 'png');
const file = new File([await res.blob()], name, {
type: res.headers.get('Content-Type') ?? '',
});
blobId = await sha(await clonedRes.arrayBuffer());
assets?.getAssets().set(blobId, file);
await assets?.writeToBlob(blobId);
} catch {
return;
}
}
walkerContext
.openNode(
{
type: 'block',
id: nanoid(),
flavour: 'affine:image',
props: {
sourceId: blobId,
},
children: [],
},
'children'
)
.closeNode();
walkerContext.skipAllChildren();
if (!imageURL) {
return;
}
await processImageNodeToBlock(imageURL, walkerContext, assets, configs);
},
},
fromBlockSnapshot: {

View File

@@ -2,12 +2,11 @@ import { ImageBlockSchema } from '@blocksuite/affine-model';
import {
BlockMarkdownAdapterExtension,
type BlockMarkdownAdapterMatcher,
FetchUtils,
type MarkdownAST,
} from '@blocksuite/affine-shared/adapters';
import { getFilenameFromContentDisposition } from '@blocksuite/affine-shared/utils';
import { sha } from '@blocksuite/global/utils';
import { getAssetName, nanoid } from '@blocksuite/store';
import { getAssetName } from '@blocksuite/store';
import { processImageNodeToBlock } from './utils';
const isImageNode = (node: MarkdownAST) => node.type === 'image';
@@ -18,63 +17,11 @@ export const imageBlockMarkdownAdapterMatcher: BlockMarkdownAdapterMatcher = {
toBlockSnapshot: {
enter: async (o, context) => {
const { configs, walkerContext, assets } = context;
let blobId = '';
const imageURL = 'url' in o.node ? o.node.url : '';
if (!assets || !imageURL) {
return;
}
if (!FetchUtils.fetchable(imageURL)) {
const imageURLSplit = imageURL.split('/');
while (imageURLSplit.length > 0) {
const key = assets
.getPathBlobIdMap()
.get(decodeURIComponent(imageURLSplit.join('/')));
if (key) {
blobId = key;
break;
}
imageURLSplit.shift();
}
} else {
const res = await FetchUtils.fetchImage(
imageURL,
undefined,
configs.get('imageProxy') as string
);
if (!res) {
return;
}
const clonedRes = res.clone();
const file = new File(
[await res.blob()],
getFilenameFromContentDisposition(
res.headers.get('Content-Disposition') ?? ''
) ??
(imageURL.split('/').at(-1) ?? 'image') +
'.' +
(res.headers.get('Content-Type')?.split('/').at(-1) ?? 'png'),
{
type: res.headers.get('Content-Type') ?? '',
}
);
blobId = await sha(await clonedRes.arrayBuffer());
assets?.getAssets().set(blobId, file);
await assets?.writeToBlob(blobId);
}
walkerContext
.openNode(
{
type: 'block',
id: nanoid(),
flavour: 'affine:image',
props: {
sourceId: blobId,
},
children: [],
},
'children'
)
.closeNode();
await processImageNodeToBlock(imageURL, walkerContext, assets, configs);
},
},
fromBlockSnapshot: {

View File

@@ -2,77 +2,10 @@ import { ImageBlockSchema } from '@blocksuite/affine-model';
import {
BlockNotionHtmlAdapterExtension,
type BlockNotionHtmlAdapterMatcher,
FetchUtils,
HastUtils,
} from '@blocksuite/affine-shared/adapters';
import { getFilenameFromContentDisposition } from '@blocksuite/affine-shared/utils';
import { sha } from '@blocksuite/global/utils';
import {
type AssetsManager,
type ASTWalkerContext,
type BlockSnapshot,
nanoid,
} from '@blocksuite/store';
async function processImageNode(
imageURL: string,
walkerContext: ASTWalkerContext<BlockSnapshot>,
assets: AssetsManager,
configs: Map<string, string>
) {
let blobId = '';
if (!FetchUtils.fetchable(imageURL)) {
const imageURLSplit = imageURL.split('/');
while (imageURLSplit.length > 0) {
const key = assets
.getPathBlobIdMap()
.get(decodeURIComponent(imageURLSplit.join('/')));
if (key) {
blobId = key;
break;
}
imageURLSplit.shift();
}
} else {
const res = await FetchUtils.fetchImage(
imageURL,
undefined,
configs.get('imageProxy') as string
);
if (!res) {
return;
}
const clonedRes = res.clone();
const name =
getFilenameFromContentDisposition(
res.headers.get('Content-Disposition') ?? ''
) ??
(imageURL.split('/').at(-1) ?? 'image') +
'.' +
(res.headers.get('Content-Type')?.split('/').at(-1) ?? 'png');
const file = new File([await res.blob()], name, {
type: res.headers.get('Content-Type') ?? '',
});
blobId = await sha(await clonedRes.arrayBuffer());
assets?.getAssets().set(blobId, file);
await assets?.writeToBlob(blobId);
}
walkerContext
.openNode(
{
type: 'block',
id: nanoid(),
flavour: ImageBlockSchema.model.flavour,
props: {
sourceId: blobId,
},
children: [],
},
'children'
)
.closeNode();
walkerContext.skipAllChildren();
}
import { processImageNodeToBlock } from './utils';
export const imageBlockNotionHtmlAdapterMatcher: BlockNotionHtmlAdapterMatcher =
{
@@ -107,7 +40,12 @@ export const imageBlockNotionHtmlAdapterMatcher: BlockNotionHtmlAdapterMatcher =
? image.properties.src
: '';
if (imageURL) {
await processImageNode(imageURL, walkerContext, assets, configs);
await processImageNodeToBlock(
imageURL,
walkerContext,
assets,
configs
);
}
break;
}
@@ -125,7 +63,12 @@ export const imageBlockNotionHtmlAdapterMatcher: BlockNotionHtmlAdapterMatcher =
: '';
}
if (imageURL) {
await processImageNode(imageURL, walkerContext, assets, configs);
await processImageNodeToBlock(
imageURL,
walkerContext,
assets,
configs
);
}
break;
}

View File

@@ -0,0 +1,89 @@
import { ImageBlockSchema } from '@blocksuite/affine-model';
import {
FetchUtils,
FULL_FILE_PATH_KEY,
getImageFullPath,
} from '@blocksuite/affine-shared/adapters';
import { getFilenameFromContentDisposition } from '@blocksuite/affine-shared/utils';
import { sha } from '@blocksuite/global/utils';
import {
type AssetsManager,
type ASTWalkerContext,
type BlockSnapshot,
nanoid,
} from '@blocksuite/store';
export async function processImageNodeToBlock(
imageURL: string,
walkerContext: ASTWalkerContext<BlockSnapshot>,
assets: AssetsManager,
configs: Map<string, string>
) {
let blobId = '';
if (!FetchUtils.fetchable(imageURL)) {
const fullFilePath = configs.get(FULL_FILE_PATH_KEY);
// When importing markdown file with assets in a zip file,
// the image URL is the relative path of the image file in the zip file
// If the full file path is provided, use it to get the image full path
if (fullFilePath) {
const decodedImageURL = decodeURIComponent(imageURL);
const imageFullPath = getImageFullPath(fullFilePath, decodedImageURL);
blobId = assets.getPathBlobIdMap().get(imageFullPath) ?? '';
} else {
const imageURLSplit = imageURL.split('/');
while (imageURLSplit.length > 0) {
const key = assets
.getPathBlobIdMap()
.get(decodeURIComponent(imageURLSplit.join('/')));
if (key) {
blobId = key;
break;
}
imageURLSplit.shift();
}
}
} else {
try {
const res = await FetchUtils.fetchImage(
imageURL,
undefined,
configs.get('imageProxy') as string
);
if (!res) {
return;
}
const clonedRes = res.clone();
const name =
getFilenameFromContentDisposition(
res.headers.get('Content-Disposition') ?? ''
) ??
(imageURL.split('/').at(-1) ?? 'image') +
'.' +
(res.headers.get('Content-Type')?.split('/').at(-1) ?? 'png');
const file = new File([await res.blob()], name, {
type: res.headers.get('Content-Type') ?? '',
});
blobId = await sha(await clonedRes.arrayBuffer());
assets?.getAssets().set(blobId, file);
await assets?.writeToBlob(blobId);
} catch (err) {
console.error('Failed to process image:', err);
return;
}
}
walkerContext
.openNode(
{
type: 'block',
id: nanoid(),
flavour: ImageBlockSchema.model.flavour,
props: {
sourceId: blobId,
},
children: [],
},
'children'
)
.closeNode();
walkerContext.skipAllChildren();
}

View File

@@ -4,9 +4,9 @@ import remarkParse from 'remark-parse';
import { unified } from 'unified';
import { describe, expect, it } from 'vitest';
import { remarkGfm } from '../../../adapters/markdown/gfm';
import { remarkCallout } from '../../../adapters/markdown/remark-plugins';
import type { MarkdownAST } from '../../../adapters/markdown/type';
import { remarkGfm } from '../../../../adapters/markdown/gfm';
import { remarkCallout } from '../../../../adapters/markdown/remark-plugins/remark-callout';
import type { MarkdownAST } from '../../../../adapters/markdown/type';
describe('remarkCallout plugin', () => {
function isBlockQuote(node: MarkdownAST): node is Blockquote {

View File

@@ -0,0 +1,57 @@
import { describe, expect, it } from 'vitest';
import { getImageFullPath } from '../../../adapters/utils/file-path';
describe('getImageFullPath', () => {
it('should resolve relative image paths correctly', () => {
const filePath = 'path/to/markdown/file.md';
// Test relative path in same directory
expect(getImageFullPath(filePath, 'image.png')).toBe(
'path/to/markdown/image.png'
);
// Test relative path in subdirectory
expect(getImageFullPath(filePath, 'images/photo.jpg')).toBe(
'path/to/markdown/images/photo.jpg'
);
// Test relative path in subdirectory
expect(getImageFullPath(filePath, './images/photo.jpg')).toBe(
'path/to/markdown/images/photo.jpg'
);
// Test relative path with parent directory
expect(getImageFullPath(filePath, '../images/photo.jpg')).toBe(
'path/to/images/photo.jpg'
);
// Test relative path with multiple parent directories
expect(getImageFullPath(filePath, '../../images/photo.jpg')).toBe(
'path/images/photo.jpg'
);
// Test relative path with multiple parent directories (which is not supported)
expect(getImageFullPath(filePath, '../../../../images/photo.jpg')).toBe(
'images/photo.jpg'
);
});
it('should handle absolute image paths correctly', () => {
const filePath = 'path/to/markdown/file.md';
// Test absolute path
expect(getImageFullPath(filePath, '/images/photo.jpg')).toBe(
'images/photo.jpg'
);
});
it('should handle URL-encoded image paths correctly', () => {
const filePath = 'path/to/markdown/file.md';
// Test URL-encoded spaces
expect(getImageFullPath(filePath, 'my%20photo.jpg')).toBe(
'path/to/markdown/my photo.jpg'
);
});
});

View File

@@ -0,0 +1,14 @@
import type { TransformerMiddleware } from '@blocksuite/store';
export const FULL_FILE_PATH_KEY = 'fullFilePath';
/**
* Middleware to set the full file path of the imported file
* @param filePath - The full file path of the imported file
* @returns A TransformerMiddleware that sets the full file path of the imported file
*/
export const filePathMiddleware = (filePath: string): TransformerMiddleware => {
return ({ adapterConfigs }) => {
adapterConfigs.set(FULL_FILE_PATH_KEY, filePath);
};
};

View File

@@ -2,6 +2,7 @@ export * from './code';
export * from './copy';
export * from './doc-link';
export * from './file-name';
export * from './file-path';
export * from './paste';
export * from './proxy';
export * from './replace-id';

View File

@@ -0,0 +1,56 @@
/**
* Normalizes a relative path by resolving all relative path segments
* @param basePath The base path (markdown file's directory)
* @param relativePath The relative path to normalize
* @returns The full path
*/
function resolveFullPath(basePath: string, relativePath: string): string {
// Split both paths into segments
const baseSegments = basePath.split('/').filter(Boolean);
const relativeSegments = relativePath.split('/').filter(Boolean);
// Handle each segment of the relative path
for (const segment of relativeSegments) {
if (segment === '.') {
// Current directory, do nothing
continue;
} else if (segment === '..') {
// Parent directory, remove last segment from base
if (baseSegments.length > 0) {
baseSegments.pop();
}
} else {
// Regular directory or file, add to base
baseSegments.push(segment);
}
}
// Join segments back into a path
return baseSegments.join('/');
}
/**
* Get the full path of the reference image from the file path and the image reference
* @param filePath The full path of the file containing the image reference
* @param imageReference The image reference from the file (can be relative or absolute path)
* @returns The full path of the reference image
*/
export function getImageFullPath(
filePath: string,
imageReference: string
): string {
// Decode the image reference in case it contains URL-encoded characters
const decodedReference = decodeURIComponent(imageReference);
// Get the directory of the file path
const markdownDir = filePath.substring(0, filePath.lastIndexOf('/'));
// Check if the image reference is a relative path
const isRelative = !decodedReference.startsWith('/');
// If the image reference is a relative path, resolve it against the file path's directory
// Otherwise, it is an absolute path, remove the leading slash if it exists
return isRelative
? resolveFullPath(markdownDir, decodedReference)
: decodedReference.replace(/^\//, '');
}

View File

@@ -1,3 +1,4 @@
export * from './fetch.js';
export * from './file-path.js';
export * from './hast.js';
export * from './text.js';

View File

@@ -2,6 +2,7 @@ import {
defaultImageProxyMiddleware,
docLinkBaseURLMiddleware,
fileNameMiddleware,
filePathMiddleware,
HtmlAdapter,
titleMiddleware,
} from '@blocksuite/affine-shared/adapters';
@@ -15,6 +16,7 @@ import type {
} from '@blocksuite/store';
import { extMimeMap, Transformer } from '@blocksuite/store';
import type { AssetMap, ImportedFileEntry, PathBlobIdMap } from './type.js';
import { createAssetsArchive, download, Unzip } from './utils.js';
type ImportHTMLToDocOptions = {
@@ -143,9 +145,9 @@ async function importHTMLZip({
await unzip.load(imported);
const docIds: string[] = [];
const pendingAssets = new Map<string, File>();
const pendingPathBlobIdMap = new Map<string, string>();
const htmlBlobs: [string, Blob][] = [];
const pendingAssets: AssetMap = new Map();
const pendingPathBlobIdMap: PathBlobIdMap = new Map();
const htmlBlobs: ImportedFileEntry[] = [];
for (const { path, content: blob } of unzip) {
if (path.includes('__MACOSX') || path.includes('.DS_Store')) {
@@ -154,7 +156,11 @@ async function importHTMLZip({
const fileName = path.split('/').pop() ?? '';
if (fileName.endsWith('.html')) {
htmlBlobs.push([fileName, blob]);
htmlBlobs.push({
filename: fileName,
contentBlob: blob,
fullPath: path,
});
} else {
const ext = path.split('.').at(-1) ?? '';
const mime = extMimeMap.get(ext) ?? '';
@@ -165,8 +171,9 @@ async function importHTMLZip({
}
await Promise.all(
htmlBlobs.map(async ([fileName, blob]) => {
const fileNameWithoutExt = fileName.replace(/\.[^/.]+$/, '');
htmlBlobs.map(async htmlFile => {
const { filename, contentBlob, fullPath } = htmlFile;
const fileNameWithoutExt = filename.replace(/\.[^/.]+$/, '');
const job = new Transformer({
schema,
blobCRUD: collection.blobSync,
@@ -179,18 +186,19 @@ async function importHTMLZip({
defaultImageProxyMiddleware,
fileNameMiddleware(fileNameWithoutExt),
docLinkBaseURLMiddleware(collection.id),
filePathMiddleware(fullPath),
],
});
const assets = job.assets;
const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
for (const [key, value] of pendingAssets.entries()) {
assets.set(key, value);
}
for (const [key, value] of pendingPathBlobIdMap.entries()) {
pathBlobIdMap.set(key, value);
for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
pathBlobIdMap.set(assetPath, key);
if (pendingAssets.get(key)) {
assets.set(key, pendingAssets.get(key)!);
}
}
const htmlAdapter = new HtmlAdapter(job, provider);
const html = await blob.text();
const html = await contentBlob.text();
const doc = await htmlAdapter.toDoc({
file: html,
assets: job.assetsManager,

View File

@@ -2,6 +2,7 @@ import {
defaultImageProxyMiddleware,
docLinkBaseURLMiddleware,
fileNameMiddleware,
filePathMiddleware,
MarkdownAdapter,
titleMiddleware,
} from '@blocksuite/affine-shared/adapters';
@@ -16,6 +17,7 @@ import type {
} from '@blocksuite/store';
import { extMimeMap, Transformer } from '@blocksuite/store';
import type { AssetMap, ImportedFileEntry, PathBlobIdMap } from './type.js';
import { createAssetsArchive, download, Unzip } from './utils.js';
function getProvider(extensions: ExtensionType[]) {
@@ -196,19 +198,28 @@ async function importMarkdownZip({
await unzip.load(imported);
const docIds: string[] = [];
const pendingAssets = new Map<string, File>();
const pendingPathBlobIdMap = new Map<string, string>();
const markdownBlobs: [string, Blob][] = [];
const pendingAssets: AssetMap = new Map();
const pendingPathBlobIdMap: PathBlobIdMap = new Map();
const markdownBlobs: ImportedFileEntry[] = [];
// Iterate over all files in the zip
for (const { path, content: blob } of unzip) {
// Skip the files that are not markdown files
if (path.includes('__MACOSX') || path.includes('.DS_Store')) {
continue;
}
// Get the file name
const fileName = path.split('/').pop() ?? '';
// If the file is a markdown file, store it to markdownBlobs
if (fileName.endsWith('.md')) {
markdownBlobs.push([fileName, blob]);
markdownBlobs.push({
filename: fileName,
contentBlob: blob,
fullPath: path,
});
} else {
// If the file is not a markdown file, store it to pendingAssets
const ext = path.split('.').at(-1) ?? '';
const mime = extMimeMap.get(ext) ?? '';
const key = await sha(await blob.arrayBuffer());
@@ -218,8 +229,9 @@ async function importMarkdownZip({
}
await Promise.all(
markdownBlobs.map(async ([fileName, blob]) => {
const fileNameWithoutExt = fileName.replace(/\.[^/.]+$/, '');
markdownBlobs.map(async markdownFile => {
const { filename, contentBlob, fullPath } = markdownFile;
const fileNameWithoutExt = filename.replace(/\.[^/.]+$/, '');
const job = new Transformer({
schema,
blobCRUD: collection.blobSync,
@@ -232,18 +244,25 @@ async function importMarkdownZip({
defaultImageProxyMiddleware,
fileNameMiddleware(fileNameWithoutExt),
docLinkBaseURLMiddleware(collection.id),
filePathMiddleware(fullPath),
],
});
const assets = job.assets;
const pathBlobIdMap = job.assetsManager.getPathBlobIdMap();
for (const [key, value] of pendingAssets.entries()) {
assets.set(key, value);
}
for (const [key, value] of pendingPathBlobIdMap.entries()) {
pathBlobIdMap.set(key, value);
// Iterate over all assets to be imported
for (const [assetPath, key] of pendingPathBlobIdMap.entries()) {
// Get the relative path of the asset to the markdown file
// Store the path to blobId map
pathBlobIdMap.set(assetPath, key);
// Store the asset to assets, the key is the blobId, the value is the file object
// In block adapter, it will use the blobId to get the file object
if (pendingAssets.get(key)) {
assets.set(key, pendingAssets.get(key)!);
}
}
const mdAdapter = new MarkdownAdapter(job, provider);
const markdown = await blob.text();
const markdown = await contentBlob.text();
const doc = await mdAdapter.toDoc({
file: markdown,
assets: job.assetsManager,

View File

@@ -0,0 +1,25 @@
/**
* Represents an imported file entry in the zip archive
*/
export type ImportedFileEntry = {
/** The filename of the file (e.g. "document.md", "document.html") */
filename: string;
/** The blob containing the file content */
contentBlob: Blob;
/** The full path of the file in the zip archive */
fullPath: string;
};
/**
* Map of asset hash to File object for all media files in the zip
* Key: SHA hash of the file content (blobId)
* Value: File object containing the actual media data
*/
export type AssetMap = Map<string, File>;
/**
* Map of file paths to their corresponding asset hashes
* Key: Original file path in the zip
* Value: SHA hash of the file content (blobId)
*/
export type PathBlobIdMap = Map<string, string>;