mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-03-22 23:30:36 +08:00
feat(server): improve indexer (#14698)
fix #13862 #### PR Dependency Tree * **PR #14698** 👈 This tree was auto-generated by [Charcoal](https://github.com/danerwilliams/charcoal) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Enhanced search support for Chinese, Japanese, and Korean languages with improved text segmentation and character matching. * Added index management capabilities with table recreation functionality. * **Bug Fixes** * Improved search accuracy for non-Latin scripts through updated morphology and n-gram configuration. * **Chores** * Added database migration for search index optimization. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -0,0 +1,12 @@
|
||||
import { ModuleRef } from '@nestjs/core';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
import { IndexerService } from '../../plugins/indexer';
|
||||
|
||||
export class RebuildManticoreMixedScriptIndexes1763800000000 {
|
||||
static async up(_db: PrismaClient, ref: ModuleRef) {
|
||||
await ref.get(IndexerService, { strict: false }).rebuildManticoreIndexes();
|
||||
}
|
||||
|
||||
static async down(_db: PrismaClient) {}
|
||||
}
|
||||
@@ -3,3 +3,4 @@ export * from './1703756315970-unamed-account';
|
||||
export * from './1721299086340-refresh-unnamed-user';
|
||||
export * from './1745211351719-create-indexer-tables';
|
||||
export * from './1751966744168-correct-session-update-time';
|
||||
export * from './1763800000000-rebuild-manticore-mixed-script-indexes';
|
||||
|
||||
@@ -4,6 +4,75 @@ The actual snapshot is saved in `manticoresearch.spec.ts.snap`.
|
||||
|
||||
Generated by [AVA](https://avajs.dev).
|
||||
|
||||
## should search doc title match chinese word segmentation
|
||||
|
||||
> Snapshot 1
|
||||
|
||||
[
|
||||
{
|
||||
_id: '5373363211628325828',
|
||||
_source: {
|
||||
doc_id: 'doc-chinese',
|
||||
workspace_id: 'workspace-test-doc-title-chinese',
|
||||
},
|
||||
fields: {
|
||||
doc_id: [
|
||||
'doc-chinese',
|
||||
],
|
||||
title: [
|
||||
'AFFiNE 是一个基于云端的笔记应用',
|
||||
],
|
||||
},
|
||||
highlights: undefined,
|
||||
},
|
||||
]
|
||||
|
||||
## should search block content match korean ngram
|
||||
|
||||
> Snapshot 1
|
||||
|
||||
[
|
||||
{
|
||||
_id: '1227635764506850985',
|
||||
_source: {
|
||||
doc_id: 'doc-korean',
|
||||
workspace_id: 'workspace-test-block-content-korean',
|
||||
},
|
||||
fields: {
|
||||
block_id: [
|
||||
'block-korean',
|
||||
],
|
||||
content: [
|
||||
'다람쥐 헌 쳇바퀴에 타고파',
|
||||
],
|
||||
},
|
||||
highlights: undefined,
|
||||
},
|
||||
]
|
||||
|
||||
## should search block content match japanese kana ngram
|
||||
|
||||
> Snapshot 1
|
||||
|
||||
[
|
||||
{
|
||||
_id: '381498385699454292',
|
||||
_source: {
|
||||
doc_id: 'doc-japanese',
|
||||
workspace_id: 'workspace-test-block-content-japanese',
|
||||
},
|
||||
fields: {
|
||||
block_id: [
|
||||
'block-japanese',
|
||||
],
|
||||
content: [
|
||||
'いろはにほへと ちりぬるを',
|
||||
],
|
||||
},
|
||||
highlights: undefined,
|
||||
},
|
||||
]
|
||||
|
||||
## should write document work
|
||||
|
||||
> Snapshot 1
|
||||
@@ -889,7 +958,7 @@ Generated by [AVA](https://avajs.dev).
|
||||
> Snapshot 1
|
||||
|
||||
{
|
||||
term: {
|
||||
equals: {
|
||||
workspace_id: 'workspaceId1',
|
||||
},
|
||||
}
|
||||
@@ -897,7 +966,7 @@ Generated by [AVA](https://avajs.dev).
|
||||
> Snapshot 2
|
||||
|
||||
{
|
||||
term: {
|
||||
equals: {
|
||||
workspace_id: 'workspaceId1',
|
||||
},
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -33,8 +33,8 @@ const user = await module.create(Mockers.User);
|
||||
const workspace = await module.create(Mockers.Workspace);
|
||||
|
||||
test.before(async () => {
|
||||
await searchProvider.createTable(SearchTable.block, blockSQL);
|
||||
await searchProvider.createTable(SearchTable.doc, docSQL);
|
||||
await searchProvider.recreateTable(SearchTable.block, blockSQL);
|
||||
await searchProvider.recreateTable(SearchTable.doc, docSQL);
|
||||
|
||||
await searchProvider.write(
|
||||
SearchTable.block,
|
||||
@@ -163,6 +163,135 @@ test('should provider is manticoresearch', t => {
|
||||
t.is(searchProvider.type, SearchProviderType.Manticoresearch);
|
||||
});
|
||||
|
||||
test('should search doc title match chinese word segmentation', async t => {
|
||||
const workspaceId = 'workspace-test-doc-title-chinese';
|
||||
const docId = 'doc-chinese';
|
||||
const title = 'AFFiNE 是一个基于云端的笔记应用';
|
||||
|
||||
await searchProvider.write(
|
||||
SearchTable.doc,
|
||||
[
|
||||
{
|
||||
workspace_id: workspaceId,
|
||||
doc_id: docId,
|
||||
title,
|
||||
},
|
||||
],
|
||||
{
|
||||
refresh: true,
|
||||
}
|
||||
);
|
||||
|
||||
const result = await searchProvider.search(SearchTable.doc, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ match: { title: '笔记' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
fields: ['doc_id', 'title'],
|
||||
sort: ['_score'],
|
||||
});
|
||||
|
||||
t.true(result.total >= 1);
|
||||
t.snapshot(
|
||||
result.nodes
|
||||
.filter(node => node._source.doc_id === docId)
|
||||
.map(node => omit(node, ['_score']))
|
||||
);
|
||||
});
|
||||
|
||||
test('should search block content match korean ngram', async t => {
|
||||
const workspaceId = 'workspace-test-block-content-korean';
|
||||
const docId = 'doc-korean';
|
||||
const blockId = 'block-korean';
|
||||
const content = '다람쥐 헌 쳇바퀴에 타고파';
|
||||
|
||||
await searchProvider.write(
|
||||
SearchTable.block,
|
||||
[
|
||||
{
|
||||
workspace_id: workspaceId,
|
||||
doc_id: docId,
|
||||
block_id: blockId,
|
||||
content,
|
||||
flavour: 'affine:paragraph',
|
||||
},
|
||||
],
|
||||
{
|
||||
refresh: true,
|
||||
}
|
||||
);
|
||||
|
||||
const result = await searchProvider.search(SearchTable.block, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ match: { content: '쥐' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
fields: ['block_id', 'content'],
|
||||
sort: ['_score'],
|
||||
});
|
||||
|
||||
t.true(result.total >= 1);
|
||||
t.snapshot(
|
||||
result.nodes
|
||||
.filter(node => node.fields.block_id?.[0] === blockId)
|
||||
.map(node => omit(node, ['_score']))
|
||||
);
|
||||
});
|
||||
|
||||
test('should search block content match japanese kana ngram', async t => {
|
||||
const workspaceId = 'workspace-test-block-content-japanese';
|
||||
const docId = 'doc-japanese';
|
||||
const blockId = 'block-japanese';
|
||||
const content = 'いろはにほへと ちりぬるを';
|
||||
|
||||
await searchProvider.write(
|
||||
SearchTable.block,
|
||||
[
|
||||
{
|
||||
workspace_id: workspaceId,
|
||||
doc_id: docId,
|
||||
block_id: blockId,
|
||||
content,
|
||||
flavour: 'affine:paragraph',
|
||||
},
|
||||
],
|
||||
{
|
||||
refresh: true,
|
||||
}
|
||||
);
|
||||
|
||||
const result = await searchProvider.search(SearchTable.block, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ match: { content: 'へ' } },
|
||||
],
|
||||
},
|
||||
},
|
||||
fields: ['block_id', 'content'],
|
||||
sort: ['_score'],
|
||||
});
|
||||
|
||||
t.true(result.total >= 1);
|
||||
t.snapshot(
|
||||
result.nodes
|
||||
.filter(node => node.fields.block_id?.[0] === blockId)
|
||||
.map(node => omit(node, ['_score']))
|
||||
);
|
||||
});
|
||||
|
||||
// #region write
|
||||
|
||||
test('should write document work', async t => {
|
||||
@@ -189,7 +318,7 @@ test('should write document work', async t => {
|
||||
|
||||
let result = await searchProvider.search(SearchTable.block, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: { match: { doc_id: docId } },
|
||||
query: { term: { doc_id: { value: docId } } },
|
||||
fields: [
|
||||
'flavour',
|
||||
'flavour_indexed',
|
||||
@@ -232,7 +361,7 @@ test('should write document work', async t => {
|
||||
|
||||
result = await searchProvider.search(SearchTable.block, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: { match: { doc_id: docId } },
|
||||
query: { term: { doc_id: { value: docId } } },
|
||||
fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
|
||||
sort: ['_score'],
|
||||
});
|
||||
@@ -263,7 +392,7 @@ test('should write document work', async t => {
|
||||
|
||||
result = await searchProvider.search(SearchTable.block, {
|
||||
_source: ['workspace_id', 'doc_id'],
|
||||
query: { match: { doc_id: docId } },
|
||||
query: { term: { doc_id: { value: docId } } },
|
||||
fields: ['flavour', 'block_id', 'content', 'ref_doc_id'],
|
||||
sort: ['_score'],
|
||||
});
|
||||
@@ -319,8 +448,8 @@ test('should handle ref_doc_id as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -371,8 +500,8 @@ test('should handle ref_doc_id as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -416,8 +545,8 @@ test('should handle content as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -455,8 +584,8 @@ test('should handle content as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -497,8 +626,8 @@ test('should handle blob as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -534,8 +663,8 @@ test('should handle blob as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -571,8 +700,8 @@ test('should handle blob as string[]', async t => {
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { workspace_id: workspaceId } },
|
||||
{ match: { doc_id: docId } },
|
||||
{ term: { workspace_id: { value: workspaceId } } },
|
||||
{ term: { doc_id: { value: docId } } },
|
||||
],
|
||||
},
|
||||
},
|
||||
@@ -682,8 +811,10 @@ test('should search query all and get next cursor work', async t => {
|
||||
'id',
|
||||
],
|
||||
query: {
|
||||
match: {
|
||||
workspace_id: workspaceId,
|
||||
term: {
|
||||
workspace_id: {
|
||||
value: workspaceId,
|
||||
},
|
||||
},
|
||||
},
|
||||
fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
|
||||
@@ -708,8 +839,10 @@ test('should search query all and get next cursor work', async t => {
|
||||
'id',
|
||||
],
|
||||
query: {
|
||||
match: {
|
||||
workspace_id: workspaceId,
|
||||
term: {
|
||||
workspace_id: {
|
||||
value: workspaceId,
|
||||
},
|
||||
},
|
||||
},
|
||||
fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
|
||||
@@ -734,8 +867,10 @@ test('should search query all and get next cursor work', async t => {
|
||||
'id',
|
||||
],
|
||||
query: {
|
||||
match: {
|
||||
workspace_id: workspaceId,
|
||||
term: {
|
||||
workspace_id: {
|
||||
value: workspaceId,
|
||||
},
|
||||
},
|
||||
},
|
||||
fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'],
|
||||
@@ -780,16 +915,20 @@ test('should filter by workspace_id work', async t => {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
workspace_id: workspaceId,
|
||||
term: {
|
||||
workspace_id: {
|
||||
value: workspaceId,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
doc_id: docId,
|
||||
term: {
|
||||
doc_id: {
|
||||
value: docId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
|
||||
@@ -8,11 +8,12 @@ import { createModule } from '../../../__tests__/create-module';
|
||||
import { Mockers } from '../../../__tests__/mocks';
|
||||
import { ConfigModule } from '../../../base/config';
|
||||
import { ServerConfigModule } from '../../../core/config';
|
||||
import { Models } from '../../../models';
|
||||
import { SearchProviderFactory } from '../factory';
|
||||
import { IndexerModule, IndexerService } from '../index';
|
||||
import { ManticoresearchProvider } from '../providers';
|
||||
import { UpsertDoc } from '../service';
|
||||
import { SearchTable } from '../tables';
|
||||
import { blockSQL, docSQL, SearchTable } from '../tables';
|
||||
import {
|
||||
AggregateInput,
|
||||
SearchInput,
|
||||
@@ -35,6 +36,7 @@ const module = await createModule({
|
||||
const indexerService = module.get(IndexerService);
|
||||
const searchProviderFactory = module.get(SearchProviderFactory);
|
||||
const manticoresearch = module.get(ManticoresearchProvider);
|
||||
const models = module.get(Models);
|
||||
const user = await module.create(Mockers.User);
|
||||
const workspace = await module.create(Mockers.Workspace, {
|
||||
snapshot: true,
|
||||
@@ -50,7 +52,8 @@ test.after.always(async () => {
|
||||
});
|
||||
|
||||
test.before(async () => {
|
||||
await indexerService.createTables();
|
||||
await manticoresearch.recreateTable(SearchTable.block, blockSQL);
|
||||
await manticoresearch.recreateTable(SearchTable.doc, docSQL);
|
||||
});
|
||||
|
||||
test.afterEach.always(async () => {
|
||||
@@ -2311,3 +2314,29 @@ test('should search docs by keyword work', async t => {
|
||||
});
|
||||
|
||||
// #endregion
|
||||
|
||||
test('should rebuild manticore indexes and requeue workspaces', async t => {
|
||||
const workspace1 = await module.create(Mockers.Workspace, {
|
||||
indexed: true,
|
||||
});
|
||||
const workspace2 = await module.create(Mockers.Workspace, {
|
||||
indexed: true,
|
||||
});
|
||||
const queueCount = module.queue.count('indexer.indexWorkspace');
|
||||
|
||||
await indexerService.rebuildManticoreIndexes();
|
||||
|
||||
const queuedWorkspaceIds = new Set(
|
||||
module.queue.add
|
||||
.getCalls()
|
||||
.filter(call => call.args[0] === 'indexer.indexWorkspace')
|
||||
.slice(queueCount)
|
||||
.map(call => call.args[1].workspaceId)
|
||||
);
|
||||
|
||||
t.true(queuedWorkspaceIds.has(workspace1.id));
|
||||
t.true(queuedWorkspaceIds.has(workspace2.id));
|
||||
|
||||
t.is((await models.workspace.get(workspace1.id))?.indexed, false);
|
||||
t.is((await models.workspace.get(workspace2.id))?.indexed, false);
|
||||
});
|
||||
|
||||
@@ -38,6 +38,17 @@ const SupportIndexedAttributes = [
|
||||
'parent_block_id',
|
||||
];
|
||||
|
||||
const SupportExactTermFields = new Set([
|
||||
'workspace_id',
|
||||
'doc_id',
|
||||
'block_id',
|
||||
'flavour',
|
||||
'parent_flavour',
|
||||
'parent_block_id',
|
||||
'created_by_user_id',
|
||||
'updated_by_user_id',
|
||||
]);
|
||||
|
||||
const ConvertEmptyStringToNullValueFields = new Set([
|
||||
'ref_doc_id',
|
||||
'ref',
|
||||
@@ -55,23 +66,20 @@ export class ManticoresearchProvider extends ElasticsearchProvider {
|
||||
table: SearchTable,
|
||||
mapping: string
|
||||
): Promise<void> {
|
||||
const url = `${this.config.provider.endpoint}/cli`;
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
body: mapping,
|
||||
headers: {
|
||||
'Content-Type': 'text/plain',
|
||||
},
|
||||
});
|
||||
// manticoresearch cli response is not json, so we need to handle it manually
|
||||
const text = (await response.text()).trim();
|
||||
if (!response.ok) {
|
||||
this.logger.error(`failed to create table ${table}, response: ${text}`);
|
||||
throw new InternalServerError();
|
||||
}
|
||||
const text = await this.#executeSQL(mapping);
|
||||
this.logger.log(`created table ${table}, response: ${text}`);
|
||||
}
|
||||
|
||||
async dropTable(table: SearchTable): Promise<void> {
|
||||
const text = await this.#executeSQL(`DROP TABLE IF EXISTS ${table}`);
|
||||
this.logger.log(`dropped table ${table}, response: ${text}`);
|
||||
}
|
||||
|
||||
async recreateTable(table: SearchTable, mapping: string): Promise<void> {
|
||||
await this.dropTable(table);
|
||||
await this.createTable(table, mapping);
|
||||
}
|
||||
|
||||
override async write(
|
||||
table: SearchTable,
|
||||
documents: Record<string, unknown>[],
|
||||
@@ -252,6 +260,12 @@ export class ManticoresearchProvider extends ElasticsearchProvider {
|
||||
// 1750389254 => new Date(1750389254 * 1000)
|
||||
return new Date(value * 1000);
|
||||
}
|
||||
if (value && typeof value === 'string') {
|
||||
const timestamp = Date.parse(value);
|
||||
if (!Number.isNaN(timestamp)) {
|
||||
return new Date(timestamp);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
@@ -302,8 +316,10 @@ export class ManticoresearchProvider extends ElasticsearchProvider {
|
||||
// workspace_id: 'workspaceId1'
|
||||
// }
|
||||
// }
|
||||
let termField = options?.termMappingField ?? 'term';
|
||||
let field = Object.keys(query.term)[0];
|
||||
let termField =
|
||||
options?.termMappingField ??
|
||||
(SupportExactTermFields.has(field) ? 'equals' : 'term');
|
||||
let value = query.term[field];
|
||||
if (typeof value === 'object' && 'value' in value) {
|
||||
if ('boost' in value) {
|
||||
@@ -432,4 +448,28 @@ export class ManticoresearchProvider extends ElasticsearchProvider {
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
async #executeSQL(sql: string) {
|
||||
const url = `${this.config.provider.endpoint}/cli`;
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'text/plain',
|
||||
};
|
||||
if (this.config.provider.apiKey) {
|
||||
headers.Authorization = `ApiKey ${this.config.provider.apiKey}`;
|
||||
} else if (this.config.provider.password) {
|
||||
headers.Authorization = `Basic ${Buffer.from(`${this.config.provider.username}:${this.config.provider.password}`).toString('base64')}`;
|
||||
}
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
body: sql,
|
||||
headers,
|
||||
});
|
||||
const text = (await response.text()).trim();
|
||||
if (!response.ok) {
|
||||
this.logger.error(`failed to execute SQL "${sql}", response: ${text}`);
|
||||
throw new InternalServerError();
|
||||
}
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
AggregateQueryDSL,
|
||||
BaseQueryDSL,
|
||||
HighlightDSL,
|
||||
ManticoresearchProvider,
|
||||
OperationOptions,
|
||||
SearchNode,
|
||||
SearchProvider,
|
||||
@@ -130,6 +131,63 @@ export class IndexerService {
|
||||
}
|
||||
}
|
||||
|
||||
async rebuildManticoreIndexes() {
|
||||
let searchProvider: SearchProvider | undefined;
|
||||
try {
|
||||
searchProvider = this.factory.get();
|
||||
} catch (err) {
|
||||
if (err instanceof SearchProviderNotFound) {
|
||||
this.logger.debug('No search provider found, skip rebuilding tables');
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (!(searchProvider instanceof ManticoresearchProvider)) {
|
||||
this.logger.debug(
|
||||
`Search provider ${searchProvider.type} does not need manticore rebuild`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const mappings = SearchTableMappingStrings[searchProvider.type];
|
||||
for (const table of Object.keys(mappings) as SearchTable[]) {
|
||||
await searchProvider.recreateTable(table, mappings[table]);
|
||||
}
|
||||
|
||||
let lastWorkspaceSid = 0;
|
||||
while (true) {
|
||||
const workspaces = await this.models.workspace.list(
|
||||
{ sid: { gt: lastWorkspaceSid } },
|
||||
{ id: true, sid: true },
|
||||
100
|
||||
);
|
||||
if (!workspaces.length) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (const workspace of workspaces) {
|
||||
await this.models.workspace.update(
|
||||
workspace.id,
|
||||
{ indexed: false },
|
||||
false
|
||||
);
|
||||
await this.queue.add(
|
||||
'indexer.indexWorkspace',
|
||||
{
|
||||
workspaceId: workspace.id,
|
||||
},
|
||||
{
|
||||
jobId: `indexWorkspace/${workspace.id}`,
|
||||
priority: 100,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
lastWorkspaceSid = workspaces[workspaces.length - 1].sid;
|
||||
}
|
||||
}
|
||||
|
||||
async write<T extends SearchTable>(
|
||||
table: T,
|
||||
documents: UpsertTypeByTable<T>[],
|
||||
|
||||
@@ -150,6 +150,8 @@ CREATE TABLE IF NOT EXISTS block (
|
||||
updated_at timestamp
|
||||
)
|
||||
morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr'
|
||||
charset_table = 'non_cjk, cjk'
|
||||
charset_table = 'non_cjk, chinese'
|
||||
ngram_len = '1'
|
||||
ngram_chars = 'U+1100..U+11FF, U+3130..U+318F, U+A960..U+A97F, U+AC00..U+D7AF, U+D7B0..U+D7FF, U+3040..U+30FF, U+0E00..U+0E7F'
|
||||
index_field_lengths = '1'
|
||||
`;
|
||||
|
||||
@@ -109,6 +109,8 @@ CREATE TABLE IF NOT EXISTS doc (
|
||||
updated_at timestamp
|
||||
)
|
||||
morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr'
|
||||
charset_table = 'non_cjk, cjk'
|
||||
charset_table = 'non_cjk, chinese'
|
||||
ngram_len = '1'
|
||||
ngram_chars = 'U+1100..U+11FF, U+3130..U+318F, U+A960..U+A97F, U+AC00..U+D7AF, U+D7B0..U+D7FF, U+3040..U+30FF, U+0E00..U+0E7F'
|
||||
index_field_lengths = '1'
|
||||
`;
|
||||
|
||||
Reference in New Issue
Block a user