mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 13:25:12 +00:00
fix(server): add stemmer filter (#12358)
CLOUD-214 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Improved search functionality to support stemming, allowing searches for variations of words (e.g., "window", "windows", "design") to return relevant results. - **Tests** - Added new tests to verify that search results correctly highlight and match stemmed word variations in document titles. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -423,6 +423,77 @@ Generated by [AVA](https://avajs.dev).
|
||||
},
|
||||
]
|
||||
|
||||
## should search doc title support stemmer filter
|
||||
|
||||
> Snapshot 1
|
||||
|
||||
{
|
||||
_id: 'workspace-test-doc-title-stemmer-filter/doc-0',
|
||||
_source: {
|
||||
doc_id: 'doc-0',
|
||||
workspace_id: 'workspace-test-doc-title-stemmer-filter',
|
||||
},
|
||||
fields: {
|
||||
doc_id: [
|
||||
'doc-0',
|
||||
],
|
||||
title: [
|
||||
'Deploy on Windows by a designer',
|
||||
],
|
||||
},
|
||||
highlights: {
|
||||
title: [
|
||||
'Deploy on <b>Windows</b> by a designer',
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
> Snapshot 2
|
||||
|
||||
{
|
||||
_id: 'workspace-test-doc-title-stemmer-filter/doc-0',
|
||||
_source: {
|
||||
doc_id: 'doc-0',
|
||||
workspace_id: 'workspace-test-doc-title-stemmer-filter',
|
||||
},
|
||||
fields: {
|
||||
doc_id: [
|
||||
'doc-0',
|
||||
],
|
||||
title: [
|
||||
'Deploy on Windows by a designer',
|
||||
],
|
||||
},
|
||||
highlights: {
|
||||
title: [
|
||||
'Deploy on <b>Windows</b> by a designer',
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
> Snapshot 3
|
||||
|
||||
{
|
||||
_id: 'workspace-test-doc-title-stemmer-filter/doc-0',
|
||||
_source: {
|
||||
doc_id: 'doc-0',
|
||||
workspace_id: 'workspace-test-doc-title-stemmer-filter',
|
||||
},
|
||||
fields: {
|
||||
doc_id: [
|
||||
'doc-0',
|
||||
],
|
||||
title: [
|
||||
'Deploy on Windows by a designer',
|
||||
],
|
||||
},
|
||||
highlights: {
|
||||
title: [
|
||||
'Deploy on Windows by a <b>designer</b>',
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
## should aggregate query work
|
||||
|
||||
> Snapshot 1
|
||||
|
||||
Binary file not shown.
@@ -1210,6 +1210,99 @@ test('should search query match ref_doc_id work', async t => {
|
||||
t.snapshot(result.nodes.map(node => pick(node, ['fields'])));
|
||||
});
|
||||
|
||||
test('should search doc title support stemmer filter', async t => {
|
||||
const docId = 'doc-0';
|
||||
const workspaceId = 'workspace-test-doc-title-stemmer-filter';
|
||||
await searchProvider.write(
|
||||
SearchTable.doc,
|
||||
[
|
||||
{
|
||||
workspace_id: workspaceId,
|
||||
doc_id: docId,
|
||||
title: 'Deploy on Windows by a designer',
|
||||
},
|
||||
],
|
||||
{
|
||||
refresh: true,
|
||||
}
|
||||
);
|
||||
|
||||
let result = await searchProvider.search(SearchTable.doc, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { title: 'window' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
fields: ['doc_id', 'title'],
|
||||
highlight: {
|
||||
fields: {
|
||||
title: {
|
||||
pre_tags: ['<b>'],
|
||||
post_tags: ['</b>'],
|
||||
},
|
||||
},
|
||||
},
|
||||
sort: ['_score'],
|
||||
});
|
||||
|
||||
t.is(result.total, 1);
|
||||
t.snapshot(omit(result.nodes[0], ['_score']));
|
||||
|
||||
result = await searchProvider.search(SearchTable.doc, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { title: 'windows' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
fields: ['doc_id', 'title'],
|
||||
highlight: {
|
||||
fields: {
|
||||
title: {
|
||||
pre_tags: ['<b>'],
|
||||
post_tags: ['</b>'],
|
||||
},
|
||||
},
|
||||
},
|
||||
sort: ['_score'],
|
||||
});
|
||||
|
||||
t.is(result.total, 1);
|
||||
t.snapshot(omit(result.nodes[0], ['_score']));
|
||||
|
||||
result = await searchProvider.search(SearchTable.doc, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { title: 'design' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
fields: ['doc_id', 'title'],
|
||||
highlight: {
|
||||
fields: {
|
||||
title: {
|
||||
pre_tags: ['<b>'],
|
||||
post_tags: ['</b>'],
|
||||
},
|
||||
},
|
||||
},
|
||||
sort: ['_score'],
|
||||
});
|
||||
|
||||
t.is(result.total, 1);
|
||||
t.snapshot(omit(result.nodes[0], ['_score']));
|
||||
});
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region aggregate
|
||||
|
||||
@@ -31,7 +31,15 @@ export const blockMapping = {
|
||||
analyzer: {
|
||||
standard_with_cjk: {
|
||||
tokenizer: 'standard',
|
||||
filter: ['lowercase', 'cjk_bigram_and_unigrams'],
|
||||
filter: [
|
||||
'lowercase',
|
||||
'cjk_bigram_and_unigrams',
|
||||
// support `windows designer` => `windows`, `window`, `designer`, `design`
|
||||
// @see https://www.elastic.co/docs/reference/text-analysis/analysis-remove-duplicates-tokenfilter
|
||||
'keyword_repeat',
|
||||
'stemmer',
|
||||
'remove_duplicates',
|
||||
],
|
||||
},
|
||||
autocomplete: {
|
||||
tokenizer: 'autocomplete_tokenizer',
|
||||
|
||||
@@ -24,7 +24,13 @@ export const docMapping = {
|
||||
analyzer: {
|
||||
standard_with_cjk: {
|
||||
tokenizer: 'standard',
|
||||
filter: ['lowercase', 'cjk_bigram_and_unigrams'],
|
||||
filter: [
|
||||
'lowercase',
|
||||
'cjk_bigram_and_unigrams',
|
||||
'keyword_repeat',
|
||||
'stemmer',
|
||||
'remove_duplicates',
|
||||
],
|
||||
},
|
||||
autocomplete: {
|
||||
tokenizer: 'autocomplete_tokenizer',
|
||||
|
||||
Reference in New Issue
Block a user