diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md index 1158449094..d1c77e73b9 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.md @@ -423,6 +423,77 @@ Generated by [AVA](https://avajs.dev). }, ] +## should search doc title support stemmer filter + +> Snapshot 1 + + { + _id: 'workspace-test-doc-title-stemmer-filter/doc-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'workspace-test-doc-title-stemmer-filter', + }, + fields: { + doc_id: [ + 'doc-0', + ], + title: [ + 'Deploy on Windows by a designer', + ], + }, + highlights: { + title: [ + 'Deploy on Windows by a designer', + ], + }, + } + +> Snapshot 2 + + { + _id: 'workspace-test-doc-title-stemmer-filter/doc-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'workspace-test-doc-title-stemmer-filter', + }, + fields: { + doc_id: [ + 'doc-0', + ], + title: [ + 'Deploy on Windows by a designer', + ], + }, + highlights: { + title: [ + 'Deploy on Windows by a designer', + ], + }, + } + +> Snapshot 3 + + { + _id: 'workspace-test-doc-title-stemmer-filter/doc-0', + _source: { + doc_id: 'doc-0', + workspace_id: 'workspace-test-doc-title-stemmer-filter', + }, + fields: { + doc_id: [ + 'doc-0', + ], + title: [ + 'Deploy on Windows by a designer', + ], + }, + highlights: { + title: [ + 'Deploy on Windows by a designer', + ], + }, + } + ## should aggregate query work > Snapshot 1 diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap index 48600af4da..97ef5b13df 100644 Binary files a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap and b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/elasticsearch.spec.ts.snap differ diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts index e4968e6be3..0d1584f029 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/elasticsearch.spec.ts @@ -1210,6 +1210,99 @@ test('should search query match ref_doc_id work', async t => { t.snapshot(result.nodes.map(node => pick(node, ['fields']))); }); +test('should search doc title support stemmer filter', async t => { + const docId = 'doc-0'; + const workspaceId = 'workspace-test-doc-title-stemmer-filter'; + await searchProvider.write( + SearchTable.doc, + [ + { + workspace_id: workspaceId, + doc_id: docId, + title: 'Deploy on Windows by a designer', + }, + ], + { + refresh: true, + } + ); + + let result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { title: 'window' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + highlight: { + fields: { + title: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); + + result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { title: 'windows' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + highlight: { + fields: { + title: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); + + result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { match: { workspace_id: workspaceId } }, + { match: { title: 'design' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + highlight: { + fields: { + title: { + pre_tags: [''], + post_tags: [''], + }, + }, + }, + sort: ['_score'], + }); + + t.is(result.total, 1); + t.snapshot(omit(result.nodes[0], ['_score'])); +}); + // #endregion // #region aggregate diff --git a/packages/backend/server/src/plugins/indexer/tables/block.ts b/packages/backend/server/src/plugins/indexer/tables/block.ts index 9261477553..828d8094ac 100644 --- a/packages/backend/server/src/plugins/indexer/tables/block.ts +++ b/packages/backend/server/src/plugins/indexer/tables/block.ts @@ -31,7 +31,15 @@ export const blockMapping = { analyzer: { standard_with_cjk: { tokenizer: 'standard', - filter: ['lowercase', 'cjk_bigram_and_unigrams'], + filter: [ + 'lowercase', + 'cjk_bigram_and_unigrams', + // support `windows designer` => `windows`, `window`, `designer`, `design` + // @see https://www.elastic.co/docs/reference/text-analysis/analysis-remove-duplicates-tokenfilter + 'keyword_repeat', + 'stemmer', + 'remove_duplicates', + ], }, autocomplete: { tokenizer: 'autocomplete_tokenizer', diff --git a/packages/backend/server/src/plugins/indexer/tables/doc.ts b/packages/backend/server/src/plugins/indexer/tables/doc.ts index 381575be31..1647fad7e0 100644 --- a/packages/backend/server/src/plugins/indexer/tables/doc.ts +++ b/packages/backend/server/src/plugins/indexer/tables/doc.ts @@ -24,7 +24,13 @@ export const docMapping = { analyzer: { standard_with_cjk: { tokenizer: 'standard', - filter: ['lowercase', 'cjk_bigram_and_unigrams'], + filter: [ + 'lowercase', + 'cjk_bigram_and_unigrams', + 'keyword_repeat', + 'stemmer', + 'remove_duplicates', + ], }, autocomplete: { tokenizer: 'autocomplete_tokenizer',