feat(server): search blob names from indexer (#12822)

#### PR Dependency Tree


* **PR #12822** 👈

This tree was auto-generated by
[Charcoal](https://github.com/danerwilliams/charcoal)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added the ability to search for blob names by their IDs within a
workspace.
- **Tests**
- Introduced new test cases and snapshot tests to validate searching
blob names and reading filenames from document snapshots.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
fengmk2
2025-06-16 16:34:23 +08:00
committed by GitHub
parent 4f75111055
commit c0c813edfd
9 changed files with 282 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ export type MockDocSnapshotInput = {
docId?: string;
blob?: Uint8Array;
updatedAt?: Date;
snapshotFile?: string;
};
export type MockedDocSnapshot = Snapshot;
@@ -23,7 +24,10 @@ export class MockDocSnapshot extends Mocker<
override async create(input: MockDocSnapshotInput) {
if (!input.blob) {
const snapshot = await readFile(
path.join(import.meta.dirname, '../__fixtures__/test-doc.snapshot.bin')
path.join(
import.meta.dirname,
`../__fixtures__/${input.snapshotFile ?? 'test-doc.snapshot.bin'}`
)
);
input.blob = snapshot;
}

View File

@@ -646,6 +646,93 @@ Generated by [AVA](https://avajs.dev).
title: 'Write, Draw, Plan all at Once.',
}
## can read blob filename from doc snapshot
> Snapshot 1
{
blocks: [
{
additional: {
displayMode: 'edgeless',
},
blockId: '4YHKIhPzAK',
content: 'index file name',
docId: 'doc-0',
flavour: 'affine:page',
yblock: {
'prop:title': 'index file name',
'sys:children': [
'WypcCGdupE',
'hZ1-cdLW5e',
],
'sys:flavour': 'affine:page',
'sys:id': '4YHKIhPzAK',
'sys:version': 2,
},
},
{
additional: {
displayMode: 'edgeless',
},
blockId: 'WypcCGdupE',
content: [],
docId: 'doc-0',
flavour: 'affine:surface',
parentBlockId: '4YHKIhPzAK',
parentFlavour: 'affine:page',
yblock: {
'prop:elements': {
type: '$blocksuite:internal:native$',
value: {},
},
'sys:children': [],
'sys:flavour': 'affine:surface',
'sys:id': 'WypcCGdupE',
'sys:version': 5,
},
},
{
additional: {
displayMode: 'page',
noteBlockId: 'hZ1-cdLW5e',
},
blob: [
'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=',
],
blockId: 'tfz1yFZdnn',
content: 'test file name here.txt',
docId: 'doc-0',
flavour: 'affine:attachment',
parentBlockId: 'hZ1-cdLW5e',
parentFlavour: 'affine:note',
yblock: {
'prop:embed': false,
'prop:footnoteIdentifier': null,
'prop:index': 'a0',
'prop:lockedBySelf': false,
'prop:meta:createdAt': 1750036953927,
'prop:meta:createdBy': '46ce597c-098a-4c61-a106-ce79827ec1de',
'prop:meta:updatedAt': 1750036953928,
'prop:meta:updatedBy': '46ce597c-098a-4c61-a106-ce79827ec1de',
'prop:name': 'test file name here.txt',
'prop:rotate': 0,
'prop:size': 3,
'prop:sourceId': 'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=',
'prop:style': 'horizontalThin',
'prop:type': 'text/plain',
'prop:xywh': '[0,0,0,0]',
'sys:children': [],
'sys:flavour': 'affine:attachment',
'sys:id': 'tfz1yFZdnn',
'sys:version': 1,
},
},
],
summary: '',
title: 'index file name',
}
## can read all blocks from doc snapshot without workspace snapshot
> Snapshot 1

View File

@@ -56,6 +56,23 @@ test('can read all blocks from doc snapshot', async t => {
});
});
test('can read blob filename from doc snapshot', async t => {
const docSnapshot = await module.create(Mockers.DocSnapshot, {
workspaceId: workspace.id,
user: owner,
snapshotFile: 'test-doc-with-blob.snapshot.bin',
});
const result = await readAllBlocksFromDocSnapshot(
workspace.id,
'doc-0',
docSnapshot.blob
);
// NOTE: avoid snapshot result directly, because it will cause hanging
t.snapshot(JSON.parse(JSON.stringify(result)));
});
test('can read all blocks from doc snapshot without workspace snapshot', async t => {
const doc = await models.doc.get(workspace.id, docSnapshot.id);
t.truthy(doc);

View File

@@ -494,3 +494,30 @@ Generated by [AVA](https://avajs.dev).
],
},
]
## should search blob names from doc snapshot work
> Snapshot 1
Map {
'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=' => 'test file name here.txt',
}
## should search blob names work
> Snapshot 1
[
[
'blob1',
'blob1 name.txt',
],
[
'blob2',
'blob2 name.md',
],
[
'blob3',
'blob3 name.docx',
],
]

View File

@@ -2113,3 +2113,103 @@ test('should index doc work', async t => {
t.is(module.event.count('doc.indexer.updated'), count + 1);
});
// #endregion
// #region searchBlobNames()
test('should search blob names from doc snapshot work', async t => {
const docSnapshot = await module.create(Mockers.DocSnapshot, {
workspaceId: workspace.id,
user,
snapshotFile: 'test-doc-with-blob.snapshot.bin',
});
await indexerService.indexDoc(workspace.id, docSnapshot.id, {
refresh: true,
});
const blobNameMap = await indexerService.searchBlobNames(workspace.id, [
'ldZMrM4PDlsNG4Q4YvCsz623h6TKu4qI9_FpTqIypfw=',
]);
t.snapshot(blobNameMap);
});
test('should search blob names work', async t => {
const workspaceId = randomUUID();
const blobId1 = 'blob1';
const blobId2 = 'blob2';
const blobId3 = 'blob3';
const blobId4 = 'blob4';
await indexerService.write(
SearchTable.block,
[
{
workspaceId,
blob: blobId1,
content: 'blob1 name.txt',
flavour: 'affine:attachment',
docId: randomUUID(),
blockId: randomUUID(),
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date(),
updatedAt: new Date(),
},
{
workspaceId,
blob: blobId2,
content: 'blob2 name.md',
flavour: 'affine:attachment',
docId: randomUUID(),
blockId: randomUUID(),
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date(),
updatedAt: new Date(),
},
{
workspaceId,
blob: blobId3,
content: 'blob3 name.docx',
flavour: 'affine:attachment',
docId: randomUUID(),
blockId: randomUUID(),
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date(),
updatedAt: new Date(),
},
// no attachment
{
workspaceId,
blob: blobId3,
content: 'mock blob3 content',
flavour: 'affine:page',
docId: randomUUID(),
blockId: randomUUID(),
createdByUserId: user.id,
updatedByUserId: user.id,
createdAt: new Date(),
updatedAt: new Date(),
},
],
{
refresh: true,
}
);
const blobNameMap = await indexerService.searchBlobNames(workspaceId, [
blobId1,
blobId2,
blobId3,
blobId4,
]);
t.is(blobNameMap.size, 3);
t.snapshot(
Array.from(blobNameMap.entries()).sort((a, b) => a[0].localeCompare(b[0]))
);
});
// #endregion

View File

@@ -387,6 +387,52 @@ export class IndexerService {
await searchProvider.deleteByQuery(table, dsl, options);
}
async searchBlobNames(workspaceId: string, blobIds: string[]) {
const result = await this.search({
table: SearchTable.block,
query: {
type: SearchQueryType.boolean,
occur: SearchQueryOccur.must,
queries: [
{
type: SearchQueryType.match,
field: 'workspaceId',
match: workspaceId,
},
{
type: SearchQueryType.match,
field: 'flavour',
match: 'affine:attachment',
},
{
type: SearchQueryType.boolean,
occur: SearchQueryOccur.should,
queries: blobIds.map(blobId => ({
type: SearchQueryType.match,
field: 'blob',
match: blobId,
})),
},
],
},
options: {
fields: ['blob', 'content'],
pagination: {
limit: 10000,
},
},
});
const blobNameMap = new Map<string, string>();
for (const node of result.nodes) {
const blobId = node.fields.blob[0] as string;
const content = node.fields.content[0] as string;
if (blobId && content) {
blobNameMap.set(blobId, content);
}
}
return blobNameMap;
}
#formatSearchNodes(nodes: SearchNode[]) {
return nodes.map(node => ({
...node,