mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-12 04:18:54 +00:00
feat(infra): opti indexer performance (#8557)
This commit is contained in:
@@ -454,101 +454,107 @@ describe.each([
|
||||
});
|
||||
});
|
||||
|
||||
test('subscribe', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
test(
|
||||
'subscribe',
|
||||
{
|
||||
timeout: 30000,
|
||||
},
|
||||
async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
|
||||
let value = null as any;
|
||||
index
|
||||
.search$({
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
})
|
||||
.pipe(map(v => (value = v)))
|
||||
.subscribe();
|
||||
let value = null as any;
|
||||
index
|
||||
.search$({
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
})
|
||||
.pipe(map(v => (value = v)))
|
||||
.subscribe();
|
||||
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 5000,
|
||||
}
|
||||
);
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
|
||||
await writeData({
|
||||
'2': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
await writeData({
|
||||
'2': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 2,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 2,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 5000,
|
||||
}
|
||||
);
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
|
||||
const writer = await index.write();
|
||||
writer.delete('1');
|
||||
await writer.commit();
|
||||
const writer = await index.write();
|
||||
writer.delete('1');
|
||||
await writer.commit();
|
||||
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '2',
|
||||
score: expect.anything(),
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '2',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 5000,
|
||||
}
|
||||
);
|
||||
});
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
@@ -100,7 +100,7 @@ export class DataStruct {
|
||||
}
|
||||
}
|
||||
|
||||
async insert(trx: DataStructRWTransaction, document: Document) {
|
||||
private async insert(trx: DataStructRWTransaction, document: Document) {
|
||||
const exists = await trx
|
||||
.objectStore('records')
|
||||
.index('id')
|
||||
@@ -138,7 +138,7 @@ export class DataStruct {
|
||||
}
|
||||
}
|
||||
|
||||
async delete(trx: DataStructRWTransaction, id: string) {
|
||||
private async delete(trx: DataStructRWTransaction, id: string) {
|
||||
const nid = await trx.objectStore('records').index('id').getKey(id);
|
||||
|
||||
if (nid) {
|
||||
@@ -159,11 +159,30 @@ export class DataStruct {
|
||||
deletes: string[],
|
||||
inserts: Document[]
|
||||
) {
|
||||
for (const del of deletes) {
|
||||
await this.delete(trx, del);
|
||||
}
|
||||
for (const inst of inserts) {
|
||||
await this.insert(trx, inst);
|
||||
const startTime = performance.now();
|
||||
try {
|
||||
for (const del of deletes) {
|
||||
await this.delete(trx, del);
|
||||
}
|
||||
for (const inst of inserts) {
|
||||
await this.insert(trx, inst);
|
||||
}
|
||||
} finally {
|
||||
const endTime = performance.now();
|
||||
if (BUILD_CONFIG.debug) {
|
||||
performance.measure(
|
||||
`[IndexedDB Indexer] Batch Write (${this.databaseName})`,
|
||||
{
|
||||
start: startTime,
|
||||
end: endTime,
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.debug(
|
||||
`[indexer ${this.databaseName}] batchWrite`,
|
||||
endTime - startTime,
|
||||
'ms'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,18 +233,6 @@ export class DataStruct {
|
||||
throw new Error(`Query type '${query.type}' not supported`);
|
||||
}
|
||||
|
||||
private async query(
|
||||
trx: DataStructROTransaction,
|
||||
query: Query<any>
|
||||
): Promise<Match> {
|
||||
const match = await this.queryRaw(trx, query);
|
||||
const filteredMatch = match.asyncFilter(async nid => {
|
||||
const record = await trx.objectStore('records').getKey(nid);
|
||||
return record !== undefined;
|
||||
});
|
||||
return filteredMatch;
|
||||
}
|
||||
|
||||
async clear(trx: DataStructRWTransaction) {
|
||||
await trx.objectStore('records').clear();
|
||||
await trx.objectStore('invertedIndex').clear();
|
||||
@@ -244,7 +251,7 @@ export class DataStruct {
|
||||
limit: options.pagination?.limit ?? 100,
|
||||
};
|
||||
|
||||
const match = await this.query(trx, query);
|
||||
const match = await this.queryRaw(trx, query);
|
||||
|
||||
const nids = match
|
||||
.toArray()
|
||||
@@ -252,7 +259,11 @@ export class DataStruct {
|
||||
|
||||
const nodes = [];
|
||||
for (const nid of nids) {
|
||||
nodes.push(await this.resultNode(trx, match, nid, options));
|
||||
const record = await trx.objectStore('records').get(nid);
|
||||
if (!record) {
|
||||
continue;
|
||||
}
|
||||
nodes.push(this.resultNode(record, options, match, nid));
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -265,9 +276,20 @@ export class DataStruct {
|
||||
nodes: nodes,
|
||||
};
|
||||
} finally {
|
||||
const endTime = performance.now();
|
||||
if (BUILD_CONFIG.debug) {
|
||||
performance.measure(
|
||||
`[IndexedDB Indexer] Search (${this.databaseName})`,
|
||||
{
|
||||
detail: { query, options },
|
||||
start: startTime,
|
||||
end: endTime,
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.debug(
|
||||
`[indexer ${this.databaseName}] search`,
|
||||
performance.now() - startTime,
|
||||
endTime - startTime,
|
||||
'ms',
|
||||
query
|
||||
);
|
||||
@@ -297,7 +319,7 @@ export class DataStruct {
|
||||
limit: 0,
|
||||
};
|
||||
|
||||
const match = await this.query(trx, query);
|
||||
const match = await this.queryRaw(trx, query);
|
||||
|
||||
const nids = match.toArray();
|
||||
|
||||
@@ -308,9 +330,11 @@ export class DataStruct {
|
||||
}[] = [];
|
||||
|
||||
for (const nid of nids) {
|
||||
const values = (await trx.objectStore('records').get(nid))?.data.get(
|
||||
field
|
||||
);
|
||||
const record = await trx.objectStore('records').get(nid);
|
||||
if (!record) {
|
||||
continue;
|
||||
}
|
||||
const values = record.data.get(field);
|
||||
for (const value of values ?? []) {
|
||||
let bucket;
|
||||
let bucketIndex = buckets.findIndex(b => b.key === value);
|
||||
@@ -332,7 +356,7 @@ export class DataStruct {
|
||||
bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit
|
||||
) {
|
||||
bucket.hits.push(
|
||||
await this.resultNode(trx, match, nid, options.hits ?? {})
|
||||
this.resultNode(record, options.hits ?? {}, match, nid)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -373,9 +397,20 @@ export class DataStruct {
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
const endTime = performance.now();
|
||||
if (BUILD_CONFIG.debug) {
|
||||
performance.measure(
|
||||
`[IndexedDB Indexer] Aggregate (${this.databaseName})`,
|
||||
{
|
||||
detail: { query, field, options },
|
||||
start: startTime,
|
||||
end: endTime,
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.debug(
|
||||
`[indexer ${this.databaseName}] aggregate`,
|
||||
performance.now() - startTime,
|
||||
endTime - startTime,
|
||||
'ms'
|
||||
);
|
||||
}
|
||||
@@ -383,12 +418,19 @@ export class DataStruct {
|
||||
|
||||
async getAll(
|
||||
trx: DataStructROTransaction,
|
||||
ids: string[]
|
||||
ids?: string[]
|
||||
): Promise<Document[]> {
|
||||
const docs = [];
|
||||
for (const id of ids) {
|
||||
const record = await trx.objectStore('records').index('id').get(id);
|
||||
if (record) {
|
||||
if (ids) {
|
||||
for (const id of ids) {
|
||||
const record = await trx.objectStore('records').index('id').get(id);
|
||||
if (record) {
|
||||
docs.push(Document.from(record.id, record.data));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const records = await trx.objectStore('records').getAll();
|
||||
for (const record of records) {
|
||||
docs.push(Document.from(record.id, record.data));
|
||||
}
|
||||
}
|
||||
@@ -405,7 +447,10 @@ export class DataStruct {
|
||||
await this.ensureInitialized();
|
||||
return this.database.transaction(
|
||||
['records', 'invertedIndex', 'kvMetadata'],
|
||||
'readonly'
|
||||
'readonly',
|
||||
{
|
||||
durability: 'relaxed',
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@@ -413,7 +458,10 @@ export class DataStruct {
|
||||
await this.ensureInitialized();
|
||||
return this.database.transaction(
|
||||
['records', 'invertedIndex', 'kvMetadata'],
|
||||
'readwrite'
|
||||
'readwrite',
|
||||
{
|
||||
durability: 'relaxed',
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@@ -446,20 +494,15 @@ export class DataStruct {
|
||||
});
|
||||
}
|
||||
|
||||
private async resultNode(
|
||||
trx: DataStructROTransaction,
|
||||
match: Match,
|
||||
nid: number,
|
||||
options: SearchOptions<any>
|
||||
): Promise<SearchResult<any, any>['nodes'][number]> {
|
||||
const record = await trx.objectStore('records').get(nid);
|
||||
if (!record) {
|
||||
throw new Error(`Record not found for nid ${nid}`);
|
||||
}
|
||||
|
||||
private resultNode(
|
||||
record: { id: string; data: Map<string, string[]> },
|
||||
options: SearchOptions<any>,
|
||||
match?: Match,
|
||||
nid?: number
|
||||
): SearchResult<any, any>['nodes'][number] {
|
||||
const node = {
|
||||
id: record.id,
|
||||
score: match.getScore(nid),
|
||||
score: match && nid ? match.getScore(nid) : 1,
|
||||
} as any;
|
||||
|
||||
if (options.fields) {
|
||||
@@ -473,7 +516,7 @@ export class DataStruct {
|
||||
node.fields = fields;
|
||||
}
|
||||
|
||||
if (options.highlights) {
|
||||
if (match && nid && options.highlights) {
|
||||
const highlights = {} as Record<string, string[]>;
|
||||
for (const { field, before, end } of options.highlights) {
|
||||
const highlightValues = match.getHighlighters(nid, field);
|
||||
|
||||
@@ -61,7 +61,7 @@ export class IndexedDBIndex<S extends Schema> implements Index<S> {
|
||||
options: SearchOptions<any> = {}
|
||||
): Observable<SearchResult<any, SearchOptions<any>>> {
|
||||
return merge(of(1), this.broadcast$).pipe(
|
||||
throttleTime(500, undefined, { leading: true, trailing: true }),
|
||||
throttleTime(3000, undefined, { leading: true, trailing: true }),
|
||||
exhaustMapWithTrailing(() => {
|
||||
return from(
|
||||
(async () => {
|
||||
@@ -88,7 +88,7 @@ export class IndexedDBIndex<S extends Schema> implements Index<S> {
|
||||
options: AggregateOptions<any> = {}
|
||||
): Observable<AggregateResult<S, AggregateOptions<any>>> {
|
||||
return merge(of(1), this.broadcast$).pipe(
|
||||
throttleTime(500, undefined, { leading: true, trailing: true }),
|
||||
throttleTime(3000, undefined, { leading: true, trailing: true }),
|
||||
exhaustMapWithTrailing(() => {
|
||||
return from(
|
||||
(async () => {
|
||||
@@ -120,7 +120,7 @@ export class IndexedDBIndexWriter<S extends Schema> implements IndexWriter<S> {
|
||||
return (await this.getAll([id]))[0] ?? null;
|
||||
}
|
||||
|
||||
async getAll(ids: string[]): Promise<Document<S>[]> {
|
||||
async getAll(ids?: string[]): Promise<Document<S>[]> {
|
||||
const trx = await this.data.readonly();
|
||||
return this.data.getAll(trx, ids);
|
||||
}
|
||||
@@ -138,6 +138,7 @@ export class IndexedDBIndexWriter<S extends Schema> implements IndexWriter<S> {
|
||||
|
||||
async commit(): Promise<void> {
|
||||
await this.data.batchWrite(this.trx, this.deletes, this.inserts);
|
||||
this.trx.commit();
|
||||
this.channel.postMessage(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -202,6 +202,12 @@ export class FullTextInvertedIndex implements InvertedIndex {
|
||||
}
|
||||
>
|
||||
>();
|
||||
const avgFieldLength =
|
||||
(
|
||||
await trx
|
||||
.objectStore('kvMetadata')
|
||||
.get(`full-text:avg-field-length:${this.fieldKey}`)
|
||||
)?.value ?? 0;
|
||||
for (const token of queryTokens) {
|
||||
const key = InvertedIndexKey.forString(this.fieldKey, token.term);
|
||||
const objs = await trx
|
||||
@@ -229,12 +235,6 @@ export class FullTextInvertedIndex implements InvertedIndex {
|
||||
};
|
||||
const termFreq = position.rs.length;
|
||||
const totalCount = objs.length;
|
||||
const avgFieldLength =
|
||||
(
|
||||
await trx
|
||||
.objectStore('kvMetadata')
|
||||
.get(`full-text:avg-field-length:${this.fieldKey}`)
|
||||
)?.value ?? 0;
|
||||
const fieldLength = position.l;
|
||||
const score =
|
||||
bm25(termFreq, 1, totalCount, fieldLength, avgFieldLength) *
|
||||
|
||||
@@ -95,28 +95,6 @@ export class Match {
|
||||
.map(e => e[0]);
|
||||
}
|
||||
|
||||
filter(predicate: (id: number) => boolean) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (predicate(id)) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
async asyncFilter(predicate: (id: number) => Promise<boolean>) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (await predicate(id)) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
private copyExtData(from: Match, id: number) {
|
||||
for (const [field, values] of from.highlighters.get(id) ?? []) {
|
||||
for (const [index, ranges] of values) {
|
||||
|
||||
@@ -47,16 +47,22 @@ export class DataStruct {
|
||||
}
|
||||
}
|
||||
|
||||
getAll(ids: string[]): Document[] {
|
||||
return ids
|
||||
.map(id => {
|
||||
const nid = this.idMap.get(id);
|
||||
if (nid === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
return Document.from(id, this.records[nid].data);
|
||||
})
|
||||
.filter((v): v is Document => v !== undefined);
|
||||
getAll(ids?: string[]): Document[] {
|
||||
if (ids) {
|
||||
return ids
|
||||
.map(id => {
|
||||
const nid = this.idMap.get(id);
|
||||
if (nid === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
return Document.from(id, this.records[nid].data);
|
||||
})
|
||||
.filter((v): v is Document => v !== undefined);
|
||||
} else {
|
||||
return this.records
|
||||
.filter(record => !record.deleted)
|
||||
.map(record => Document.from(record.id, record.data));
|
||||
}
|
||||
}
|
||||
|
||||
insert(document: Document) {
|
||||
|
||||
@@ -28,7 +28,7 @@ export class MemoryIndex<S extends Schema> implements Index<S> {
|
||||
return (await this.getAll([id]))[0] ?? null;
|
||||
}
|
||||
|
||||
getAll(ids: string[]): Promise<Document<S>[]> {
|
||||
getAll(ids?: string[]): Promise<Document<S>[]> {
|
||||
return Promise.resolve(this.data.getAll(ids));
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ export interface IndexWriter<S extends Schema>
|
||||
export interface IndexReader<S extends Schema> {
|
||||
get(id: string): Promise<Document<S> | null>;
|
||||
|
||||
getAll(ids: string[]): Promise<Document<S>[]>;
|
||||
getAll(ids?: string[]): Promise<Document<S>[]>;
|
||||
|
||||
has(id: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
@@ -50,7 +50,9 @@ export class IndexedDBJobQueue<J> implements JobQueue<J> {
|
||||
async accept(): Promise<Job[] | null> {
|
||||
await this.ensureInitialized();
|
||||
const jobs = [];
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite');
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
|
||||
// if no priority jobs
|
||||
|
||||
@@ -148,7 +150,9 @@ export class IndexedDBJobQueue<J> implements JobQueue<J> {
|
||||
|
||||
async complete(jobs: Job[]): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite');
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
|
||||
for (const { id } of jobs) {
|
||||
await trx
|
||||
@@ -162,7 +166,9 @@ export class IndexedDBJobQueue<J> implements JobQueue<J> {
|
||||
|
||||
async return(jobs: Job[], retry: boolean = false): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite');
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
|
||||
for (const { id } of jobs) {
|
||||
if (retry) {
|
||||
@@ -185,7 +191,9 @@ export class IndexedDBJobQueue<J> implements JobQueue<J> {
|
||||
|
||||
async clear(): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite');
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
await trx.objectStore('jobs').clear();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user