fix(server): return empty summary field value (#12517)

close AF-2658

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **Tests**
  - Added new test cases and snapshots to enhance coverage for search results involving empty or missing fields like summary, title, and ref_doc_id.
  - Verified consistent handling of empty string values and absence of fields across different search providers.

- **Bug Fixes**
  - Improved handling of empty string values for specific fields by converting them to null to ensure consistent search result formatting.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
fengmk2
2025-05-27 04:42:54 +00:00
parent 3676f3b769
commit 382c237dac
10 changed files with 459 additions and 1 deletions

View File

@@ -138,3 +138,35 @@ Generated by [AVA](https://avajs.dev).
},
},
}
## should return empty nodes when docId not exists
> Snapshot 1
{
workspace: {
search: {
nodes: [],
pagination: {
count: 0,
hasMore: false,
nextCursor: null,
},
},
},
}
## should empty doc summary string when doc exists but no summary
> Snapshot 1
[
{
fields: {
summary: [
'',
],
},
highlights: null,
},
]

View File

@@ -276,3 +276,87 @@ e2e('should return empty results when search not match any docs', async t => {
t.snapshot(result);
});
e2e('should return empty nodes when docId not exists', async t => {
const owner = await app.signup();
const workspace = await app.create(Mockers.Workspace, {
owner,
});
const result = await app.gql({
query: indexerSearchQuery,
variables: {
id: workspace.id,
input: {
table: SearchTable.doc,
query: {
type: SearchQueryType.match,
field: 'docId',
match: 'not-exists-doc-id',
},
options: {
fields: ['summary'],
pagination: {
limit: 1,
},
},
},
},
});
t.snapshot(result);
});
e2e(
'should empty doc summary string when doc exists but no summary',
async t => {
const owner = await app.signup();
const workspace = await app.create(Mockers.Workspace, {
owner,
});
const indexerService = app.get(IndexerService);
await indexerService.write(
SearchTable.doc,
[
{
docId: 'doc-1-without-summary',
workspaceId: workspace.id,
title: 'test1',
summary: '',
createdByUserId: owner.id,
updatedByUserId: owner.id,
createdAt: new Date('2025-04-22T00:00:00.000Z'),
updatedAt: new Date('2025-04-22T00:00:00.000Z'),
},
],
{
refresh: true,
}
);
const result = await app.gql({
query: indexerSearchQuery,
variables: {
id: workspace.id,
input: {
table: SearchTable.doc,
query: {
type: SearchQueryType.match,
field: 'docId',
match: 'doc-1-without-summary',
},
options: {
fields: ['summary'],
pagination: {
limit: 1,
},
},
},
},
});
t.snapshot(result.workspace.search.nodes);
}
);

View File

@@ -494,6 +494,55 @@ Generated by [AVA](https://avajs.dev).
},
}
## should return empty string field:summary value
> Snapshot 1
[
{
_id: 'workspaceId-search-query-return-empty-string-field-summary-value-for-elasticsearch/doc0',
_source: {
doc_id: 'doc0',
workspace_id: 'workspaceId-search-query-return-empty-string-field-summary-value-for-elasticsearch',
},
fields: {
doc_id: [
'doc0',
],
summary: [
'',
],
title: [
'',
],
},
highlights: undefined,
},
]
## should not return not exists field:ref_doc_id
> Snapshot 1
[
{
_id: 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-elasticsearch/doc0/block0',
_source: {
doc_id: 'doc0',
workspace_id: 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-elasticsearch',
},
fields: {
block_id: [
'block0',
],
doc_id: [
'doc0',
],
},
highlights: undefined,
},
]
## should aggregate query work
> Snapshot 1

View File

@@ -9,6 +9,9 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1
{
block_id: [
'',
],
content: [
'hello world',
],
@@ -29,6 +32,9 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 2
{
block_id: [
'',
],
content: [
'hello world',
],
@@ -43,6 +49,9 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 3
{
block_id: [
'',
],
content: [
'hello world',
],
@@ -239,6 +248,9 @@ Generated by [AVA](https://avajs.dev).
blob: [
'blob1',
],
content: [
'',
],
flavour: [
'affine:page',
],
@@ -262,6 +274,9 @@ Generated by [AVA](https://avajs.dev).
'blob1',
'blob2',
],
content: [
'',
],
flavour: [
'affine:page',
],
@@ -284,6 +299,9 @@ Generated by [AVA](https://avajs.dev).
blob: [
'blob3',
],
content: [
'',
],
flavour: [
'affine:page',
],
@@ -687,6 +705,55 @@ Generated by [AVA](https://avajs.dev).
},
]
## should return empty string field:summary value
> Snapshot 1
[
{
_id: '274027293861775228',
_source: {
doc_id: 'doc0',
workspace_id: 'workspaceId-search-query-return-empty-string-field-summary-value-for-manticoresearch',
},
fields: {
doc_id: [
'doc0',
],
summary: [
'',
],
title: [
'',
],
},
highlights: undefined,
},
]
## should not return not exists field:ref_doc_id
> Snapshot 1
[
{
_id: '2457631367295327017',
_source: {
doc_id: 'doc0',
workspace_id: 'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-manticoresearch',
},
fields: {
block_id: [
'block0',
],
doc_id: [
'doc0',
],
},
highlights: undefined,
},
]
## should aggregate query return top score first
> Snapshot 1

View File

@@ -1303,6 +1303,113 @@ test('should search doc title support stemmer filter', async t => {
t.snapshot(omit(result.nodes[0], ['_score']));
});
test('should return empty string field:summary value', async t => {
const workspaceId =
'workspaceId-search-query-return-empty-string-field-summary-value-for-elasticsearch';
const docId = 'doc0';
await searchProvider.write(
SearchTable.doc,
[
{
workspace_id: workspaceId,
doc_id: docId,
title: '',
summary: '',
created_by_user_id: user.id,
updated_by_user_id: user.id,
created_at: new Date(),
updated_at: new Date(),
},
],
{
refresh: true,
}
);
let result = await searchProvider.search(SearchTable.doc, {
_source: ['workspace_id', 'doc_id'],
query: {
bool: {
must: [
{
term: { workspace_id: { value: workspaceId } },
},
{
term: {
doc_id: {
value: docId,
},
},
},
],
},
},
fields: ['doc_id', 'title', 'summary'],
sort: ['_score'],
});
t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
});
test('should not return not exists field:ref_doc_id', async t => {
const workspaceId =
'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-elasticsearch';
const docId = 'doc0';
const blockId = 'block0';
await searchProvider.write(
SearchTable.block,
[
{
workspace_id: workspaceId,
doc_id: docId,
block_id: blockId,
content: 'hello world on search title blockId1-text',
flavour: 'affine:text',
created_by_user_id: user.id,
updated_by_user_id: user.id,
created_at: new Date(),
updated_at: new Date(),
},
],
{
refresh: true,
}
);
let result = await searchProvider.search(SearchTable.block, {
_source: ['workspace_id', 'doc_id'],
query: {
bool: {
must: [
{
term: { workspace_id: { value: workspaceId } },
},
{
term: {
doc_id: {
value: docId,
},
},
},
],
},
},
fields: [
'doc_id',
'block_id',
'ref_doc_id',
'parent_block_id',
'additional',
'parent_flavour',
],
sort: ['_score'],
});
t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
});
// #endregion
// #region aggregate

View File

@@ -1065,6 +1065,113 @@ test('should search query match ref_doc_id work', async t => {
t.is(result.total, 2);
});
test('should return empty string field:summary value', async t => {
const workspaceId =
'workspaceId-search-query-return-empty-string-field-summary-value-for-manticoresearch';
const docId = 'doc0';
await searchProvider.write(
SearchTable.doc,
[
{
workspace_id: workspaceId,
doc_id: docId,
title: '',
summary: '',
created_by_user_id: user.id,
updated_by_user_id: user.id,
created_at: new Date(),
updated_at: new Date(),
},
],
{
refresh: true,
}
);
let result = await searchProvider.search(SearchTable.doc, {
_source: ['workspace_id', 'doc_id'],
query: {
bool: {
must: [
{
term: { workspace_id: { value: workspaceId } },
},
{
term: {
doc_id: {
value: docId,
},
},
},
],
},
},
fields: ['doc_id', 'title', 'summary'],
sort: ['_score'],
});
t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
});
test('should not return not exists field:ref_doc_id', async t => {
const workspaceId =
'workspaceId-search-query-not-return-not-exists-field-ref_doc_id-for-manticoresearch';
const docId = 'doc0';
const blockId = 'block0';
await searchProvider.write(
SearchTable.block,
[
{
workspace_id: workspaceId,
doc_id: docId,
block_id: blockId,
content: 'hello world on search title blockId1-text',
flavour: 'affine:text',
created_by_user_id: user.id,
updated_by_user_id: user.id,
created_at: new Date(),
updated_at: new Date(),
},
],
{
refresh: true,
}
);
let result = await searchProvider.search(SearchTable.block, {
_source: ['workspace_id', 'doc_id'],
query: {
bool: {
must: [
{
term: { workspace_id: { value: workspaceId } },
},
{
term: {
doc_id: {
value: docId,
},
},
},
],
},
},
fields: [
'doc_id',
'block_id',
'ref_doc_id',
'parent_block_id',
'additional',
'parent_flavour',
],
sort: ['_score'],
});
t.snapshot(result.nodes.map(node => omit(node, ['_score'])));
});
// #endregion
// #region aggregate

View File

@@ -38,6 +38,15 @@ const SupportIndexedAttributes = [
'parent_block_id',
];
const ConvertEmptyStringToNullValueFields = new Set([
'ref_doc_id',
'ref',
'blob',
'additional',
'parent_block_id',
'parent_flavour',
]);
@Injectable()
export class ManticoresearchProvider extends ElasticsearchProvider {
override type = SearchProviderType.Manticoresearch;
@@ -344,7 +353,10 @@ export class ManticoresearchProvider extends ElasticsearchProvider {
return fields.reduce(
(acc, field) => {
let value = source[field];
if (value !== null && value !== undefined && value !== '') {
if (ConvertEmptyStringToNullValueFields.has(field) && value === '') {
value = null;
}
if (value !== null && value !== undefined) {
// special handle `ref_doc_id`, `ref`, `blob` as string[]
if (
(field === 'ref_doc_id' || field === 'ref' || field === 'blob') &&