diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md index e975fdabdd..917580376a 100644 --- a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md +++ b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.md @@ -138,3 +138,35 @@ Generated by [AVA](https://avajs.dev). }, }, } + +## should return empty nodes when docId not exists + +> Snapshot 1 + + { + workspace: { + search: { + nodes: [], + pagination: { + count: 0, + hasMore: false, + nextCursor: null, + }, + }, + }, + } + +## should empty doc summary string when doc exists but no summary + +> Snapshot 1 + + [ + { + fields: { + summary: [ + '', + ], + }, + highlights: null, + }, + ] diff --git a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap index ec5c80acce..180402967c 100644 Binary files a/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap and b/packages/backend/server/src/__tests__/e2e/indexer/__snapshots__/search.spec.ts.snap differ diff --git a/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts b/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts index ab136518a7..5bce9f5240 100644 --- a/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts +++ b/packages/backend/server/src/__tests__/e2e/indexer/search.spec.ts @@ -276,3 +276,87 @@ e2e('should return empty results when search not match any docs', async t => { t.snapshot(result); }); + +e2e('should return empty nodes when docId not exists', async t => { + const owner = await app.signup(); + const workspace = await app.create(Mockers.Workspace, { + owner, + }); + + const result = await app.gql({ + query: indexerSearchQuery, + variables: { + id: workspace.id, + input: { + table: SearchTable.doc, + query: { + type: SearchQueryType.match, + field: 'docId', + match: 'not-exists-doc-id', + }, + options: { + fields: ['summary'], + pagination: { + limit: 1, + }, + }, + }, + }, + }); + + t.snapshot(result); +}); + +e2e( + 'should empty doc summary string when doc exists but no summary', + async t => { + const owner = await app.signup(); + const workspace = await app.create(Mockers.Workspace, { + owner, + }); + + const indexerService = app.get(IndexerService); + + await indexerService.write( + SearchTable.doc, + [ + { + docId: 'doc-1-without-summary', + workspaceId: workspace.id, + title: 'test1', + summary: '', + createdByUserId: owner.id, + updatedByUserId: owner.id, + createdAt: new Date('2025-04-22T00:00:00.000Z'), + updatedAt: new Date('2025-04-22T00:00:00.000Z'), + }, + ], + { + refresh: true, + } + ); + + const result = await app.gql({ + query: indexerSearchQuery, + variables: { + id: workspace.id, + input: { + table: SearchTable.doc, + query: { + type: SearchQueryType.match, + field: 'docId', + match: 'doc-1-without-summary', + }, + options: { + fields: ['summary'], + pagination: { + limit: 1, + }, + }, + }, + }, + }); + + t.snapshot(result.workspace.search.nodes); + } +); diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md index d1c77e73b9..ea75f1a483 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md @@ -494,6 +494,55 @@ Generated by [AVA](https://avajs.dev). }, } +## should return empty string field:summary value + +> Snapshot 1 + + [ + { + _id: 'workspaceId-search-query-return-empty-string-field-summary-value-for-elasticsearch/doc0', + _source: { + doc_id: 'doc0', + workspace_id: 'workspaceId-search-query-return-empty-string-field-summary-value-for-elasticsearch', + }, + fields: { + doc_id: [ + 'doc0', + ], + summary: [ + '', + ], + title: [ + '', + ], + }, + highlights: undefined, + }, + ] + +## should not return not exists field:ref_doc_id + +> Snapshot 1 + + [ + { + _id: 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-elasticsearch/doc0/block0', + _source: { + doc_id: 'doc0', + workspace_id: 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-elasticsearch', + }, + fields: { + block_id: [ + 'block0', + ], + doc_id: [ + 'doc0', + ], + }, + highlights: undefined, + }, + ] + ## should aggregate query work > Snapshot 1 diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap index 97ef5b13df..a98a9c1b9a 100644 Binary files a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap and b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap differ diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md index ff0410b1e2..f7a619b17d 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md @@ -9,6 +9,9 @@ Generated by [AVA](https://avajs.dev). > Snapshot 1 { + block_id: [ + '', + ], content: [ 'hello world', ], @@ -29,6 +32,9 @@ Generated by [AVA](https://avajs.dev). > Snapshot 2 { + block_id: [ + '', + ], content: [ 'hello world', ], @@ -43,6 +49,9 @@ Generated by [AVA](https://avajs.dev). > Snapshot 3 { + block_id: [ + '', + ], content: [ 'hello world', ], @@ -239,6 +248,9 @@ Generated by [AVA](https://avajs.dev). blob: [ 'blob1', ], + content: [ + '', + ], flavour: [ 'affine:page', ], @@ -262,6 +274,9 @@ Generated by [AVA](https://avajs.dev). 'blob1', 'blob2', ], + content: [ + '', + ], flavour: [ 'affine:page', ], @@ -284,6 +299,9 @@ Generated by [AVA](https://avajs.dev). blob: [ 'blob3', ], + content: [ + '', + ], flavour: [ 'affine:page', ], @@ -687,6 +705,55 @@ Generated by [AVA](https://avajs.dev). }, ] +## should return empty string field:summary value + +> Snapshot 1 + + [ + { + _id: '274027293861775228', + _source: { + doc_id: 'doc0', + workspace_id: 'workspaceId-search-query-return-empty-string-field-summary-value-for-manticoresearch', + }, + fields: { + doc_id: [ + 'doc0', + ], + summary: [ + '', + ], + title: [ + '', + ], + }, + highlights: undefined, + }, + ] + +## should not return not exists field:ref_doc_id + +> Snapshot 1 + + [ + { + _id: '2457631367295327017', + _source: { + doc_id: 'doc0', + workspace_id: 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-manticoresearch', + }, + fields: { + block_id: [ + 'block0', + ], + doc_id: [ + 'doc0', + ], + }, + highlights: undefined, + }, + ] + ## should aggregate query return top score first > Snapshot 1 diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap index 3a14e99615..ee2736d984 100644 Binary files a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap and b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap differ diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts index 0d1584f029..3f39b499c2 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts @@ -1303,6 +1303,113 @@ test('should search doc title support stemmer filter', async t => { t.snapshot(omit(result.nodes[0], ['_score'])); }); +test('should return empty string field:summary value', async t => { + const workspaceId = + 'workspaceId-search-query-return-empty-string-field-summary-value-for-elasticsearch'; + const docId = 'doc0'; + + await searchProvider.write( + SearchTable.doc, + [ + { + workspace_id: workspaceId, + doc_id: docId, + title: '', + summary: '', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspaceId } }, + }, + { + term: { + doc_id: { + value: docId, + }, + }, + }, + ], + }, + }, + fields: ['doc_id', 'title', 'summary'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + +test('should not return not exists field:ref_doc_id', async t => { + const workspaceId = + 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-elasticsearch'; + const docId = 'doc0'; + const blockId = 'block0'; + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: 'hello world on search title blockId1-text', + flavour: 'affine:text', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspaceId } }, + }, + { + term: { + doc_id: { + value: docId, + }, + }, + }, + ], + }, + }, + fields: [ + 'doc_id', + 'block_id', + 'ref_doc_id', + 'parent_block_id', + 'additional', + 'parent_flavour', + ], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + // #endregion // #region aggregate diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts index e502aae96e..6ecf13b5b0 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts @@ -1065,6 +1065,113 @@ test('should search query match ref_doc_id work', async t => { t.is(result.total, 2); }); +test('should return empty string field:summary value', async t => { + const workspaceId = + 'workspaceId-search-query-return-empty-string-field-summary-value-for-manticoresearch'; + const docId = 'doc0'; + + await searchProvider.write( + SearchTable.doc, + [ + { + workspace_id: workspaceId, + doc_id: docId, + title: '', + summary: '', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspaceId } }, + }, + { + term: { + doc_id: { + value: docId, + }, + }, + }, + ], + }, + }, + fields: ['doc_id', 'title', 'summary'], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + +test('should not return not exists field:ref_doc_id', async t => { + const workspaceId = + 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-manticoresearch'; + const docId = 'doc0'; + const blockId = 'block0'; + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content: 'hello world on search title blockId1-text', + flavour: 'affine:text', + created_by_user_id: user.id, + updated_by_user_id: user.id, + created_at: new Date(), + updated_at: new Date(), + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { + term: { workspace_id: { value: workspaceId } }, + }, + { + term: { + doc_id: { + value: docId, + }, + }, + }, + ], + }, + }, + fields: [ + 'doc_id', + 'block_id', + 'ref_doc_id', + 'parent_block_id', + 'additional', + 'parent_flavour', + ], + sort: ['_score'], + }); + + t.snapshot(result.nodes.map(node => omit(node, ['_score']))); +}); + // #endregion // #region aggregate diff --git a/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts index 47e8698384..9eeb1432bc 100644 --- a/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts +++ b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts @@ -38,6 +38,15 @@ const SupportIndexedAttributes = [ 'parent_block_id', ]; +const ConvertEmptyStringToNullValueFields = new Set([ + 'ref_doc_id', + 'ref', + 'blob', + 'additional', + 'parent_block_id', + 'parent_flavour', +]); + @Injectable() export class ManticoresearchProvider extends ElasticsearchProvider { override type = SearchProviderType.Manticoresearch; @@ -344,7 +353,10 @@ export class ManticoresearchProvider extends ElasticsearchProvider { return fields.reduce( (acc, field) => { let value = source[field]; - if (value !== null && value !== undefined && value !== '') { + if (ConvertEmptyStringToNullValueFields.has(field) && value === '') { + value = null; + } + if (value !== null && value !== undefined) { // special handle `ref_doc_id`, `ref`, `blob` as string[] if ( (field === 'ref_doc_id' || field === 'ref' || field === 'blob') &&