diff --git a/packages/backend/server/src/models/user.ts b/packages/backend/server/src/models/user.ts index f86a49e450..dd69600d74 100644 --- a/packages/backend/server/src/models/user.ts +++ b/packages/backend/server/src/models/user.ts @@ -45,6 +45,10 @@ interface UserFilter { withDisabled?: boolean; } +export interface ItemWithUserId { + userId: string; +} + export type PublicUser = Pick; export type WorkspaceUser = Pick; export type { ConnectedAccount, User }; @@ -78,6 +82,19 @@ export class UserModel extends BaseModel { }); } + async getPublicUsersMap( + items: T[] + ): Promise> { + const userIds: string[] = []; + for (const item of items) { + if (item.userId) { + userIds.push(item.userId); + } + } + const users = await this.getPublicUsers(userIds); + return new Map(users.map(user => [user.id, user])); + } + async getWorkspaceUser(id: string): Promise { return this.db.user.findUnique({ select: workspaceUserSelect, diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md index 91c2ab9488..57518ce9bd 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md +++ b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.md @@ -521,3 +521,38 @@ Generated by [AVA](https://avajs.dev). 'blob3 name.docx', ], ] + +## should search docs by keyword work + +> Snapshot 1 + + [ + { + blockId: 'block1', + createdAt: Date 2025-06-20 00:00:00 UTC {}, + highlight: 'hello world', + title: 'hello world', + updatedAt: Date 2025-06-20 00:00:00 UTC {}, + }, + { + blockId: 'block2', + createdAt: Date 2025-06-20 00:00:01 UTC {}, + highlight: 'hello world 2', + title: 'hello world 2', + updatedAt: Date 2025-06-20 00:00:01 UTC {}, + }, + { + blockId: 'block3', + createdAt: Date 2025-06-20 00:00:02 UTC {}, + highlight: 'hello world 3', + title: 'hello world 3', + updatedAt: Date 2025-06-20 00:00:02 UTC {}, + }, + { + blockId: 'block4', + createdAt: Date 2025-06-20 00:00:03 UTC {}, + highlight: 'hello world 4', + title: '', + updatedAt: Date 2025-06-20 00:00:03 UTC {}, + }, + ] diff --git a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap index 1c27fea9df..e3dcb5368e 100644 Binary files a/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap and b/packages/backend/server/src/plugins/indexer/__tests__/__snapshots__/service.spec.ts.snap differ diff --git a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts index 6759b2c014..f533499a54 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts @@ -2213,3 +2213,101 @@ test('should search blob names work', async t => { }); // #endregion + +// #region searchDocsByKeyword() + +test('should search docs by keyword work', async t => { + const workspaceId = workspace.id; + const docId1 = randomUUID(); + const docId2 = randomUUID(); + const docId3 = randomUUID(); + const docId4 = randomUUID(); + + await module.create(Mockers.DocMeta, { + workspaceId, + docId: docId1, + title: 'hello world 1', + }); + await module.create(Mockers.DocMeta, { + workspaceId, + docId: docId2, + title: 'hello world 2', + }); + await module.create(Mockers.DocMeta, { + workspaceId, + docId: docId3, + title: 'hello world 3', + }); + + await indexerService.write( + SearchTable.block, + [ + { + workspaceId, + docId: docId1, + blockId: 'block1', + content: 'hello world', + flavour: 'affine:page', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-06-20T00:00:00.000Z'), + updatedAt: new Date('2025-06-20T00:00:00.000Z'), + }, + { + workspaceId, + docId: docId2, + blockId: 'block2', + content: 'hello world 2', + flavour: 'affine:text', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-06-20T00:00:01.000Z'), + updatedAt: new Date('2025-06-20T00:00:01.000Z'), + }, + { + workspaceId, + docId: docId3, + blockId: 'block3', + content: 'hello world 3', + flavour: 'affine:text', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-06-20T00:00:02.000Z'), + updatedAt: new Date('2025-06-20T00:00:02.000Z'), + }, + { + workspaceId, + docId: docId4, + blockId: 'block4', + content: 'hello world 4', + flavour: 'affine:text', + createdByUserId: user.id, + updatedByUserId: user.id, + createdAt: new Date('2025-06-20T00:00:03.000Z'), + updatedAt: new Date('2025-06-20T00:00:03.000Z'), + }, + ], + { + refresh: true, + } + ); + + const rows = await indexerService.searchDocsByKeyword(workspaceId, 'hello'); + + t.is(rows.length, 4); + t.snapshot( + rows + .map(row => + omit(row, [ + 'docId', + 'createdByUserId', + 'updatedByUserId', + 'createdByUser', + 'updatedByUser', + ]) + ) + .sort((a, b) => a.blockId.localeCompare(b.blockId)) + ); +}); + +// #endregion diff --git a/packages/backend/server/src/plugins/indexer/index.ts b/packages/backend/server/src/plugins/indexer/index.ts index c6e4ea15d4..4a012f8cab 100644 --- a/packages/backend/server/src/plugins/indexer/index.ts +++ b/packages/backend/server/src/plugins/indexer/index.ts @@ -26,6 +26,7 @@ import { IndexerService } from './service'; export class IndexerModule {} export { IndexerService }; +export type { SearchDoc } from './types'; declare global { interface Events { diff --git a/packages/backend/server/src/plugins/indexer/service.ts b/packages/backend/server/src/plugins/indexer/service.ts index d0fd67ba82..a8c4013699 100644 --- a/packages/backend/server/src/plugins/indexer/service.ts +++ b/packages/backend/server/src/plugins/indexer/service.ts @@ -33,6 +33,7 @@ import { } from './tables'; import { AggregateInput, + SearchDoc, SearchHighlight, SearchInput, SearchQuery, @@ -433,6 +434,155 @@ export class IndexerService { return blobNameMap; } + async searchDocsByKeyword( + workspaceId: string, + keyword: string, + options?: { + limit?: number; + } + ): Promise { + const limit = options?.limit ?? 20; + const result = await this.aggregate({ + table: SearchTable.block, + field: 'docId', + query: { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'workspaceId', + match: workspaceId, + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.must, + queries: [ + { + type: SearchQueryType.match, + field: 'content', + match: keyword, + }, + { + type: SearchQueryType.boolean, + occur: SearchQueryOccur.should, + queries: [ + { + type: SearchQueryType.match, + field: 'content', + match: keyword, + }, + { + type: SearchQueryType.boost, + boost: 1.5, + query: { + type: SearchQueryType.match, + field: 'flavour', + match: 'affine:page', + }, + }, + ], + }, + ], + }, + ], + }, + options: { + hits: { + fields: [ + 'blockId', + 'flavour', + 'content', + 'createdAt', + 'updatedAt', + 'createdByUserId', + 'updatedByUserId', + ], + highlights: [ + { + field: 'content', + before: '', + end: '', + }, + ], + pagination: { + limit: 2, + }, + }, + pagination: { + limit, + }, + }, + }); + + const docs: SearchDoc[] = []; + const missingTitles: { workspaceId: string; docId: string }[] = []; + const userIds: { userId: string }[] = []; + + for (const bucket of result.buckets) { + const docId = bucket.key; + const blockId = bucket.hits.nodes[0].fields.blockId[0] as string; + const flavour = bucket.hits.nodes[0].fields.flavour[0] as string; + const content = bucket.hits.nodes[0].fields.content[0] as string; + const createdAt = bucket.hits.nodes[0].fields.createdAt[0] as Date; + const updatedAt = bucket.hits.nodes[0].fields.updatedAt[0] as Date; + const createdByUserId = bucket.hits.nodes[0].fields + .createdByUserId[0] as string; + const updatedByUserId = bucket.hits.nodes[0].fields + .updatedByUserId[0] as string; + const highlight = bucket.hits.nodes[0].highlights?.content?.[0] as string; + let title = ''; + + // hit title block + if (flavour === 'affine:page') { + title = content; + } else { + // hit content block, missing title + missingTitles.push({ workspaceId, docId }); + } + + docs.push({ + docId, + blockId, + title, + highlight, + createdAt, + updatedAt, + createdByUserId, + updatedByUserId, + }); + userIds.push({ userId: createdByUserId }, { userId: updatedByUserId }); + } + + if (missingTitles.length > 0) { + const metas = await this.models.doc.findMetas(missingTitles, { + select: { + title: true, + }, + }); + const titleMap = new Map(); + for (const meta of metas) { + if (meta?.title) { + titleMap.set(meta.docId, meta.title); + } + } + for (const doc of docs) { + if (!doc.title) { + doc.title = titleMap.get(doc.docId) ?? ''; + } + } + } + + const userMap = await this.models.user.getPublicUsersMap(userIds); + + for (const doc of docs) { + doc.createdByUser = userMap.get(doc.createdByUserId); + doc.updatedByUser = userMap.get(doc.updatedByUserId); + } + + return docs; + } + #formatSearchNodes(nodes: SearchNode[]) { return nodes.map(node => ({ ...node, diff --git a/packages/backend/server/src/plugins/indexer/types.ts b/packages/backend/server/src/plugins/indexer/types.ts index e58b071b5c..1d20f3c845 100644 --- a/packages/backend/server/src/plugins/indexer/types.ts +++ b/packages/backend/server/src/plugins/indexer/types.ts @@ -9,6 +9,7 @@ import { } from '@nestjs/graphql'; import { GraphQLJSONObject } from 'graphql-scalars'; +import { PublicUser } from '../../models'; import { SearchTable } from './tables'; export enum SearchQueryType { @@ -40,6 +41,19 @@ registerEnumType(SearchQueryOccur, { description: 'Search query occur', }); +export interface SearchDoc { + docId: string; + blockId: string; + title: string; + highlight: string; + createdAt: Date; + updatedAt: Date; + createdByUserId: string; + updatedByUserId: string; + createdByUser?: PublicUser; + updatedByUser?: PublicUser; +} + @InputType() export class SearchQuery { @Field(() => SearchQueryType)