feat(infra): collect more data to indexer (#8528)

This commit is contained in:
EYHN
2024-10-19 20:22:26 +08:00
committed by GitHub
parent 8f92be926b
commit 01c3a3b4c0
11 changed files with 341 additions and 169 deletions

View File

@@ -40,7 +40,10 @@ export class Document<S extends Schema = any> {
}
} else {
for (const key in map) {
doc.insert(key, map[key] as string | string[]);
if (map[key] === undefined) {
continue;
}
doc.insert(key, map[key]);
}
}
return doc;

View File

@@ -1,3 +1,4 @@
import { DebugLogger } from '@affine/debug';
import {
type DBSchema,
type IDBPDatabase,
@@ -25,6 +26,8 @@ import {
} from './inverted-index';
import { Match } from './match';
const logger = new DebugLogger('indexeddb');
export interface IndexDB extends DBSchema {
kvMetadata: {
key: string;
@@ -75,14 +78,19 @@ export class DataStruct {
constructor(
readonly databaseName: string,
schema: Schema
readonly schema: Schema
) {
for (const [key, type] of Object.entries(schema)) {
if (type === 'String') {
const typeInfo = typeof type === 'string' ? { type } : type;
if (typeInfo.index === false) {
// If index is false, we don't need to create an inverted index for this field.
continue;
}
if (typeInfo.type === 'String') {
this.invertedIndex.set(key, new StringInvertedIndex(key));
} else if (type === 'Integer') {
} else if (typeInfo.type === 'Integer') {
this.invertedIndex.set(key, new IntegerInvertedIndex(key));
} else if (type === 'FullText') {
} else if (typeInfo.type === 'FullText') {
this.invertedIndex.set(key, new FullTextInvertedIndex(key));
} else if (type === 'Boolean') {
this.invertedIndex.set(key, new BooleanInvertedIndex(key));
@@ -102,17 +110,29 @@ export class DataStruct {
throw new Error('Document already exists');
}
const dataMap = new Map();
for (const [key, values] of document.fields) {
const type = this.schema[key as string];
if (!type) {
return;
}
const typeInfo = typeof type === 'string' ? { type } : type;
if (typeInfo.store !== false) {
// If store is false, the field will not be stored
dataMap.set(key, values);
}
}
const nid = await trx.objectStore('records').add({
id: document.id,
data: new Map(document.fields as Map<string, string[]>),
data: dataMap,
});
for (const [key, values] of document.fields) {
const iidx = this.invertedIndex.get(key as string);
if (!iidx) {
throw new Error(
`Inverted index '${key.toString()}' not found, document not match schema`
);
return;
}
await iidx.insert(trx, nid, values);
}
@@ -164,7 +184,7 @@ export class DataStruct {
if (query.type === 'match') {
const iidx = this.invertedIndex.get(query.field as string);
if (!iidx) {
throw new Error(`Field '${query.field as string}' not found`);
return new Match();
}
return await iidx.match(trx, query.match);
} else if (query.type === 'boolean') {
@@ -187,7 +207,7 @@ export class DataStruct {
} else if (query.type === 'exists') {
const iidx = this.invertedIndex.get(query.field as string);
if (!iidx) {
throw new Error(`Field '${query.field as string}' not found`);
return new Match();
}
return await iidx.all(trx);
}
@@ -217,31 +237,41 @@ export class DataStruct {
query: Query<any>,
options: SearchOptions<any>
): Promise<SearchResult<any, any>> {
const pagination = {
skip: options.pagination?.skip ?? 0,
limit: options.pagination?.limit ?? 100,
};
const startTime = performance.now();
try {
const pagination = {
skip: options.pagination?.skip ?? 0,
limit: options.pagination?.limit ?? 100,
};
const match = await this.query(trx, query);
const match = await this.query(trx, query);
const nids = match
.toArray()
.slice(pagination.skip, pagination.skip + pagination.limit);
const nids = match
.toArray()
.slice(pagination.skip, pagination.skip + pagination.limit);
const nodes = [];
for (const nid of nids) {
nodes.push(await this.resultNode(trx, match, nid, options));
const nodes = [];
for (const nid of nids) {
nodes.push(await this.resultNode(trx, match, nid, options));
}
return {
pagination: {
count: match.size(),
hasMore: match.size() > pagination.limit + pagination.skip,
limit: pagination.limit,
skip: pagination.skip,
},
nodes: nodes,
};
} finally {
logger.debug(
`[indexer ${this.databaseName}] search`,
performance.now() - startTime,
'ms',
query
);
}
return {
pagination: {
count: match.size(),
hasMore: match.size() > pagination.limit + pagination.skip,
limit: pagination.limit,
skip: pagination.skip,
},
nodes: nodes,
};
}
async aggregate(
@@ -250,95 +280,105 @@ export class DataStruct {
field: string,
options: AggregateOptions<any>
): Promise<AggregateResult<any, any>> {
const pagination = {
skip: options.pagination?.skip ?? 0,
limit: options.pagination?.limit ?? 100,
};
const startTime = performance.now();
try {
const pagination = {
skip: options.pagination?.skip ?? 0,
limit: options.pagination?.limit ?? 100,
};
const hitPagination = options.hits
? {
skip: options.hits.pagination?.skip ?? 0,
limit: options.hits.pagination?.limit ?? 3,
}
: {
skip: 0,
limit: 0,
};
const hitPagination = options.hits
? {
skip: options.hits.pagination?.skip ?? 0,
limit: options.hits.pagination?.limit ?? 3,
}
: {
skip: 0,
limit: 0,
};
const match = await this.query(trx, query);
const match = await this.query(trx, query);
const nids = match.toArray();
const nids = match.toArray();
const buckets: {
key: string;
nids: number[];
hits: SearchResult<any, any>['nodes'];
}[] = [];
const buckets: {
key: string;
nids: number[];
hits: SearchResult<any, any>['nodes'];
}[] = [];
for (const nid of nids) {
const values = (await trx.objectStore('records').get(nid))?.data.get(
field
);
for (const value of values ?? []) {
let bucket;
let bucketIndex = buckets.findIndex(b => b.key === value);
if (bucketIndex === -1) {
bucket = { key: value, nids: [], hits: [] };
buckets.push(bucket);
bucketIndex = buckets.length - 1;
} else {
bucket = buckets[bucketIndex];
}
for (const nid of nids) {
const values = (await trx.objectStore('records').get(nid))?.data.get(
field
);
for (const value of values ?? []) {
let bucket;
let bucketIndex = buckets.findIndex(b => b.key === value);
if (bucketIndex === -1) {
bucket = { key: value, nids: [], hits: [] };
buckets.push(bucket);
bucketIndex = buckets.length - 1;
} else {
bucket = buckets[bucketIndex];
}
if (
bucketIndex >= pagination.skip &&
bucketIndex < pagination.skip + pagination.limit
) {
bucket.nids.push(nid);
if (
bucket.nids.length - 1 >= hitPagination.skip &&
bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit
bucketIndex >= pagination.skip &&
bucketIndex < pagination.skip + pagination.limit
) {
bucket.hits.push(
await this.resultNode(trx, match, nid, options.hits ?? {})
);
bucket.nids.push(nid);
if (
bucket.nids.length - 1 >= hitPagination.skip &&
bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit
) {
bucket.hits.push(
await this.resultNode(trx, match, nid, options.hits ?? {})
);
}
}
}
}
return {
buckets: buckets
.slice(pagination.skip, pagination.skip + pagination.limit)
.map(bucket => {
const result = {
key: bucket.key,
score: match.getScore(bucket.nids[0]),
count: bucket.nids.length,
} as AggregateResult<any, any>['buckets'][number];
if (options.hits) {
(result as any).hits = {
pagination: {
count: bucket.nids.length,
hasMore:
bucket.nids.length >
hitPagination.limit + hitPagination.skip,
limit: hitPagination.limit,
skip: hitPagination.skip,
},
nodes: bucket.hits,
} as SearchResult<any, any>;
}
return result;
}),
pagination: {
count: buckets.length,
hasMore: buckets.length > pagination.limit + pagination.skip,
limit: pagination.limit,
skip: pagination.skip,
},
};
} finally {
logger.debug(
`[indexer ${this.databaseName}] aggregate`,
performance.now() - startTime,
'ms'
);
}
return {
buckets: buckets
.slice(pagination.skip, pagination.skip + pagination.limit)
.map(bucket => {
const result = {
key: bucket.key,
score: match.getScore(bucket.nids[0]),
count: bucket.nids.length,
} as AggregateResult<any, any>['buckets'][number];
if (options.hits) {
(result as any).hits = {
pagination: {
count: bucket.nids.length,
hasMore:
bucket.nids.length > hitPagination.limit + hitPagination.skip,
limit: hitPagination.limit,
skip: hitPagination.skip,
},
nodes: bucket.hits,
} as SearchResult<any, any>;
}
return result;
}),
pagination: {
count: buckets.length,
hasMore: buckets.length > pagination.limit + pagination.skip,
limit: pagination.limit,
skip: pagination.skip,
},
};
}
async getAll(

View File

@@ -21,7 +21,11 @@ export interface InvertedIndex {
}
export class StringInvertedIndex implements InvertedIndex {
constructor(readonly fieldKey: string) {}
constructor(
readonly fieldKey: string,
readonly index: boolean = true,
readonly store: boolean = true
) {}
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
const objs = await trx
@@ -69,7 +73,11 @@ export class StringInvertedIndex implements InvertedIndex {
}
export class IntegerInvertedIndex implements InvertedIndex {
constructor(readonly fieldKey: string) {}
constructor(
readonly fieldKey: string,
readonly index: boolean = true,
readonly store: boolean = true
) {}
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
const objs = await trx
@@ -118,7 +126,11 @@ export class IntegerInvertedIndex implements InvertedIndex {
}
export class BooleanInvertedIndex implements InvertedIndex {
constructor(readonly fieldKey: string) {}
constructor(
readonly fieldKey: string,
readonly index: boolean = true,
readonly store: boolean = true
) {}
// eslint-disable-next-line sonarjs/no-identical-functions
async all(trx: DataStructROTransaction): Promise<Match> {
@@ -172,7 +184,11 @@ export class BooleanInvertedIndex implements InvertedIndex {
}
export class FullTextInvertedIndex implements InvertedIndex {
constructor(readonly fieldKey: string) {}
constructor(
readonly fieldKey: string,
readonly index: boolean = true,
readonly store: boolean = true
) {}
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
const queryTokens = new GeneralTokenizer().tokenize(term);

View File

@@ -31,13 +31,15 @@ export class DataStruct {
constructor(schema: Schema) {
for (const [key, type] of Object.entries(schema)) {
if (type === 'String') {
const typeInfo = typeof type === 'string' ? { type } : type;
if (typeInfo.type === 'String') {
this.invertedIndex.set(key, new StringInvertedIndex(key));
} else if (type === 'Integer') {
} else if (typeInfo.type === 'Integer') {
this.invertedIndex.set(key, new IntegerInvertedIndex(key));
} else if (type === 'FullText') {
} else if (typeInfo.type === 'FullText') {
this.invertedIndex.set(key, new FullTextInvertedIndex(key));
} else if (type === 'Boolean') {
} else if (typeInfo.type === 'Boolean') {
this.invertedIndex.set(key, new BooleanInvertedIndex(key));
} else {
throw new Error(`Field type '${type}' not supported`);

View File

@@ -1,6 +1,24 @@
import type { FieldType } from './field-type';
export type Schema = Record<string, FieldType>;
export type Schema = Record<
string,
| FieldType
| {
type: FieldType;
/**
* If false, the field will not be indexed, and thus not searchable.
*
* default: true
*/
index?: boolean;
/**
* If false, the field will not be stored, and not included in the search result.
*
* default: true
*/
store?: boolean;
}
>;
export function defineSchema<T extends Schema>(schema: T): T {
return schema;