diff --git a/packages/backend/server/src/__tests__/__fixtures__/test-doc-with-blob.snapshot.bin b/packages/backend/server/src/__tests__/__fixtures__/test-doc-with-blob.snapshot.bin new file mode 100644 index 0000000000..a69decc962 Binary files /dev/null and b/packages/backend/server/src/__tests__/__fixtures__/test-doc-with-blob.snapshot.bin differ diff --git a/packages/backend/server/src/__tests__/mocks/doc-snapshot.mock.ts b/packages/backend/server/src/__tests__/mocks/doc-snapshot.mock.ts index ac8ae5295e..9fcecd5082 100644 --- a/packages/backend/server/src/__tests__/mocks/doc-snapshot.mock.ts +++ b/packages/backend/server/src/__tests__/mocks/doc-snapshot.mock.ts @@ -12,6 +12,7 @@ export type MockDocSnapshotInput = { docId?: string; blob?: Uint8Array; updatedAt?: Date; + snapshotFile?: string; }; export type MockedDocSnapshot = Snapshot; @@ -23,7 +24,10 @@ export class MockDocSnapshot extends Mocker< override async create(input: MockDocSnapshotInput) { if (!input.blob) { const snapshot = await readFile( - path.join(import.meta.dirname, '../__fixtures__/test-doc.snapshot.bin') + path.join( + import.meta.dirname, + `../__fixtures__/${input.snapshotFile ?? 'test-doc.snapshot.bin'}` + ) ); input.blob = snapshot; } diff --git a/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.md b/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.md index cc8182962e..0b02895056 100644 --- a/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.md +++ b/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.md @@ -646,6 +646,93 @@ Generated by [AVA](https://avajs.dev). title: 'Write, Draw, Plan all at Once.', } +## can read blob filename from doc snapshot + +> Snapshot 1 + + { + blocks: [ + { + additional: { + displayMode: 'edgeless', + }, + blockId: '4YHKIhPzAK', + content: 'index file name', + docId: 'doc-0', + flavour: 'affine:page', + yblock: { + 'prop:title': 'index file name', + 'sys:children': [ + 'WypcCGdupE', + 'hZ1-cdLW5e', + ], + 'sys:flavour': 'affine:page', + 'sys:id': '4YHKIhPzAK', + 'sys:version': 2, + }, + }, + { + additional: { + displayMode: 'edgeless', + }, + blockId: 'WypcCGdupE', + content: [], + docId: 'doc-0', + flavour: 'affine:surface', + parentBlockId: '4YHKIhPzAK', + parentFlavour: 'affine:page', + yblock: { + 'prop:elements': { + type: '$blocksuite:internal:native$', + value: {}, + }, + 'sys:children': [], + 'sys:flavour': 'affine:surface', + 'sys:id': 'WypcCGdupE', + 'sys:version': 5, + }, + }, + { + additional: { + displayMode: 'page', + noteBlockId: 'hZ1-cdLW5e', + }, + blob: [ + 'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=', + ], + blockId: 'tfz1yFZdnn', + content: 'test file name here.txt', + docId: 'doc-0', + flavour: 'affine:attachment', + parentBlockId: 'hZ1-cdLW5e', + parentFlavour: 'affine:note', + yblock: { + 'prop:embed': false, + 'prop:footnoteIdentifier': null, + 'prop:index': 'a0', + 'prop:lockedBySelf': false, + 'prop:meta:createdAt': 1750036953927, + 'prop:meta:createdBy': '46ce597c-098a-4c61-a106-ce79827ec1de', + 'prop:meta:updatedAt': 1750036953928, + 'prop:meta:updatedBy': '46ce597c-098a-4c61-a106-ce79827ec1de', + 'prop:name': 'test file name here.txt', + 'prop:rotate': 0, + 'prop:size': 3, + 'prop:sourceId': 'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=', + 'prop:style': 'horizontalThin', + 'prop:type': 'text/plain', + 'prop:xywh': '[0,0,0,0]', + 'sys:children': [], + 'sys:flavour': 'affine:attachment', + 'sys:id': 'tfz1yFZdnn', + 'sys:version': 1, + }, + }, + ], + summary: '', + title: 'index file name', + } + ## can read all blocks from doc snapshot without workspace snapshot > Snapshot 1 diff --git a/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.snap b/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.snap index 3d84ff9c2d..8c97b9a14d 100644 Binary files a/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.snap and b/packages/backend/server/src/core/utils/__tests__/__snapshots__/blocksute.spec.ts.snap differ diff --git a/packages/backend/server/src/core/utils/__tests__/blocksute.spec.ts b/packages/backend/server/src/core/utils/__tests__/blocksute.spec.ts index d5c56449c7..25c79cece1 100644 --- a/packages/backend/server/src/core/utils/__tests__/blocksute.spec.ts +++ b/packages/backend/server/src/core/utils/__tests__/blocksute.spec.ts @@ -56,6 +56,23 @@ test('can read all blocks from doc snapshot', async t => { }); }); +test('can read blob filename from doc snapshot', async t => { + const docSnapshot = await module.create(Mockers.DocSnapshot, { + workspaceId: workspace.id, + user: owner, + snapshotFile: 'test-doc-with-blob.snapshot.bin', + }); + + const result = await readAllBlocksFromDocSnapshot( + workspace.id, + 'doc-0', + docSnapshot.blob + ); + + // NOTE: avoid snapshot result directly, because it will cause hanging + t.snapshot(JSON.parse(JSON.stringify(result))); +}); + test('can read all blocks from doc snapshot without workspace snapshot', async t => { const doc = await models.doc.get(workspace.id, docSnapshot.id); t.truthy(doc); diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md index e472c3492e..91c2ab9488 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md +++ b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md @@ -494,3 +494,30 @@ Generated by [AVA](https://avajs.dev). ], }, ] + +## should search blob names from doc snapshot work + +> Snapshot 1 + + Map { + 'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=' => 'test file name here.txt', + } + +## should search blob names work + +> Snapshot 1 + + [ + [ + 'blob1', + 'blob1 name.txt', + ], + [ + 'blob2', + 'blob2 name.md', + ], + [ + 'blob3', + 'blob3 name.docx', + ], + ] diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap index 80e888c3ac..1c27fea9df 100644 Binary files a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap and b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap differ diff --git a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts index b3b1ec2dcf..6759b2c014 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts @@ -2113,3 +2113,103 @@ test('should index doc work', async t => { t.is(module.event.count('doc.indexer.updated'), count + 1); }); // #endregion + +// #region searchBlobNames() + +test('should search blob names from doc snapshot work', async t => { + const docSnapshot = await module.create(Mockers.DocSnapshot, { + workspaceId: workspace.id, + user, + snapshotFile: 'test-doc-with-blob.snapshot.bin', + }); + + await indexerService.indexDoc(workspace.id, docSnapshot.id, { + refresh: true, + }); + + const blobNameMap = await indexerService.searchBlobNames(workspace.id, [ + 'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=', + ]); + + t.snapshot(blobNameMap); +}); + +test('should search blob names work', async t => { + const workspaceId = randomUUID(); + const blobId1 = 'blob1'; + const blobId2 = 'blob2'; + const blobId3 = 'blob3'; + const blobId4 = 'blob4'; + + await indexerService.write( + SearchTable.block, + [ + { + workspaceId, + blob: blobId1, + content: 'blob1 name.txt', + flavour: 'affine:attachment', + docId: randomUUID(), + blockId: randomUUID(), + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId, + blob: blobId2, + content: 'blob2 name.md', + flavour: 'affine:attachment', + docId: randomUUID(), + blockId: randomUUID(), + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + { + workspaceId, + blob: blobId3, + content: 'blob3 name.docx', + flavour: 'affine:attachment', + docId: randomUUID(), + blockId: randomUUID(), + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + // no attachment + { + workspaceId, + blob: blobId3, + content: 'mock blob3 content', + flavour: 'affine:page', + docId: randomUUID(), + blockId: randomUUID(), + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date(), + updatedAt: new Date(), + }, + ], + { + refresh: true, + } + ); + + const blobNameMap = await indexerService.searchBlobNames(workspaceId, [ + blobId1, + blobId2, + blobId3, + blobId4, + ]); + + t.is(blobNameMap.size, 3); + t.snapshot( + Array.from(blobNameMap.entries()).sort((a, b) => a[0].localeCompare(b[0])) + ); +}); + +// #endregion diff --git a/packages/backend/server/src/plugins/indexer/service.ts b/packages/backend/server/src/plugins/indexer/service.ts index 64d5e6e3ce..d0fd67ba82 100644 --- a/packages/backend/server/src/plugins/indexer/service.ts +++ b/packages/backend/server/src/plugins/indexer/service.ts @@ -387,6 +387,52 @@ export class IndexerService { await searchProvider.deleteByQuery(table, dsl, options); } + async searchBlobNames(workspaceId: string, blobIds: string[]) { + const result = await this.search({ + table: SearchTable.block, + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspaceId, + }, + { + type: SearchQueryType.match, + field: 'flavour', + match: 'affine:attachment', + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: blobIds.map(blobId => ({ + type: SearchQueryType.match, + field: 'blob', + match: blobId, + })), + }, + ], + }, + options: { + fields: ['blob', 'content'], + pagination: { + limit: 10000, + }, + }, + }); + const blobNameMap = new Map(); + for (const node of result.nodes) { + const blobId = node.fields.blob[0] as string; + const content = node.fields.content[0] as string; + if (blobId && content) { + blobNameMap.set(blobId, content); + } + } + return blobNameMap; + } + #formatSearchNodes(nodes: SearchNode[]) { return nodes.map(node => ({ ...node,