mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-12 04:18:54 +00:00
feat(nbstore): add indexer storage (#10953)
This commit is contained in:
@@ -5,5 +5,4 @@ export * from './livedata';
|
||||
export * from './media';
|
||||
export * from './orm';
|
||||
export * from './storage';
|
||||
export * from './sync';
|
||||
export * from './utils';
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
export * from './indexer';
|
||||
export {
|
||||
IndexedDBIndex,
|
||||
IndexedDBIndexStorage,
|
||||
} from './indexer/impl/indexeddb';
|
||||
export { MemoryIndex, MemoryIndexStorage } from './indexer/impl/memory';
|
||||
export * from './job';
|
||||
export { IndexedDBJobQueue } from './job/impl/indexeddb';
|
||||
@@ -1,147 +0,0 @@
|
||||
# index
|
||||
|
||||
Search engine abstraction layer for AFFiNE.
|
||||
|
||||
## Using
|
||||
|
||||
1. Define schema
|
||||
|
||||
First, we need to define the shape of the data. Currently, there are the following data types.
|
||||
|
||||
- 'Integer'
|
||||
- 'Boolean'
|
||||
- 'FullText': for full-text search, it will be tokenized and stemmed.
|
||||
- 'String': for exact match search, e.g. tags, ids.
|
||||
|
||||
```typescript
|
||||
const schema = defineSchema({
|
||||
title: 'FullText',
|
||||
tag: 'String',
|
||||
size: 'Integer',
|
||||
});
|
||||
```
|
||||
|
||||
> **Array type**
|
||||
> All types can contain one or more values, so each field can store an array.
|
||||
|
||||
2. Pick a backend
|
||||
|
||||
Currently, there are two backends available.
|
||||
|
||||
- `MemoryIndex`: in-memory indexer, useful for testing.
|
||||
- `IndexedDBIndex`: persistent indexer using IndexedDB.
|
||||
|
||||
> **Underlying Data Table**
|
||||
> Some back-end processes need to maintain underlying data tables, including table creation and migration. This operation should be silently executed the first time the indexer is invoked.
|
||||
> Callers do not need to worry about these details.
|
||||
>
|
||||
> This design conforms to the usual conventions of search engine APIs, such as in Elasticsearch: https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html
|
||||
|
||||
3. Write data
|
||||
|
||||
Write data to the indexer. you need to start a write transaction by `await index.write()` first and then complete the batch write through `await writer.commit()`.
|
||||
|
||||
> **Transactional**
|
||||
> Typically, the indexer does not provide transactional guarantees; reliable locking logic needs to be implemented at a higher level.
|
||||
|
||||
```typescript
|
||||
const indexer = new IndexedDBIndex(schema);
|
||||
|
||||
const writer = await index.write();
|
||||
writer.insert(
|
||||
Document.from('id', {
|
||||
title: 'hello world',
|
||||
tag: ['doc', 'page'],
|
||||
size: '100',
|
||||
})
|
||||
);
|
||||
await writer.commit();
|
||||
```
|
||||
|
||||
4. Search data
|
||||
|
||||
To search for content in the indexer, you need to use a specific **query language**. Here are some examples:
|
||||
|
||||
```typescript
|
||||
// match title == 'hello world'
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
}
|
||||
|
||||
// match title == 'hello world' && tag == 'doc'
|
||||
{
|
||||
type: 'boolean',
|
||||
occur: 'must',
|
||||
queries: [
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
},
|
||||
{
|
||||
type: 'match',
|
||||
field: 'tag',
|
||||
match: 'doc',
|
||||
},
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
There are two ways to perform the search, `index.search()` and `index.aggregate()`.
|
||||
|
||||
- **search**: return each matched node and pagination information.
|
||||
- **aggregate**: aggregate all matched results based on a certain field into buckets, and return the count and score of items in each bucket.
|
||||
|
||||
Examples:
|
||||
|
||||
```typescript
|
||||
const result = await index.search({
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
});
|
||||
// result = {
|
||||
// nodes: [
|
||||
// {
|
||||
// id: '1',
|
||||
// score: 1,
|
||||
// },
|
||||
// ],
|
||||
// pagination: {
|
||||
// count: 1,
|
||||
// hasMore: false,
|
||||
// limit: 10,
|
||||
// skip: 0,
|
||||
// },
|
||||
// }
|
||||
```
|
||||
|
||||
```typescript
|
||||
const result = await index.aggregate(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'affine',
|
||||
},
|
||||
'tag'
|
||||
);
|
||||
// result = {
|
||||
// buckets: [
|
||||
// { key: 'motorcycle', count: 2, score: 1 },
|
||||
// { key: 'bike', count: 1, score: 1 },
|
||||
// { key: 'airplane', count: 1, score: 1 },
|
||||
// ],
|
||||
// pagination: {
|
||||
// count: 3,
|
||||
// hasMore: false,
|
||||
// limit: 10,
|
||||
// skip: 0,
|
||||
// },
|
||||
// }
|
||||
```
|
||||
|
||||
More uses:
|
||||
|
||||
[black-box.spec.ts](./__tests__/black-box.spec.ts)
|
||||
@@ -1,560 +0,0 @@
|
||||
/**
|
||||
* @vitest-environment happy-dom
|
||||
*/
|
||||
import 'fake-indexeddb/auto';
|
||||
|
||||
import { map } from 'rxjs';
|
||||
import { beforeEach, describe, expect, test, vitest } from 'vitest';
|
||||
|
||||
import { defineSchema, Document, type Index } from '..';
|
||||
import { IndexedDBIndex } from '../impl/indexeddb';
|
||||
import { MemoryIndex } from '../impl/memory';
|
||||
|
||||
const schema = defineSchema({
|
||||
title: 'FullText',
|
||||
tag: 'String',
|
||||
size: 'Integer',
|
||||
});
|
||||
|
||||
let index: Index<typeof schema> = null!;
|
||||
|
||||
describe.each([
|
||||
{ name: 'memory', backend: MemoryIndex },
|
||||
{ name: 'idb', backend: IndexedDBIndex },
|
||||
])('index tests($name)', ({ backend }) => {
|
||||
async function writeData(
|
||||
data: Record<
|
||||
string,
|
||||
Partial<Record<keyof typeof schema, string | string[]>>
|
||||
>
|
||||
) {
|
||||
const writer = await index.write();
|
||||
for (const [id, item] of Object.entries(data)) {
|
||||
const doc = new Document(id);
|
||||
for (const [key, value] of Object.entries(item)) {
|
||||
if (Array.isArray(value)) {
|
||||
for (const v of value) {
|
||||
doc.insert(key, v);
|
||||
}
|
||||
} else {
|
||||
doc.insert(key, value);
|
||||
}
|
||||
}
|
||||
writer.insert(doc);
|
||||
}
|
||||
await writer.commit();
|
||||
}
|
||||
|
||||
beforeEach(async () => {
|
||||
index = new backend(schema);
|
||||
await index.clear();
|
||||
});
|
||||
|
||||
test('basic', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
|
||||
const result = await index.search({
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('basic integer', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
size: '100',
|
||||
},
|
||||
});
|
||||
|
||||
const result = await index.search({
|
||||
type: 'match',
|
||||
field: 'size',
|
||||
match: '100',
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('fuzz', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
const result = await index.search({
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hell',
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('highlight', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
size: '100',
|
||||
},
|
||||
});
|
||||
|
||||
const result = await index.search(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello',
|
||||
},
|
||||
{
|
||||
highlights: [
|
||||
{
|
||||
field: 'title',
|
||||
before: '<b>',
|
||||
end: '</b>',
|
||||
},
|
||||
],
|
||||
}
|
||||
);
|
||||
|
||||
expect(result).toEqual({
|
||||
nodes: expect.arrayContaining([
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
highlights: {
|
||||
title: [expect.stringContaining('<b>hello</b>')],
|
||||
},
|
||||
},
|
||||
]),
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('fields', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
tag: ['car', 'bike'],
|
||||
},
|
||||
});
|
||||
|
||||
const result = await index.search(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello',
|
||||
},
|
||||
{
|
||||
fields: ['title', 'tag'],
|
||||
}
|
||||
);
|
||||
|
||||
expect(result.nodes[0].fields).toEqual({
|
||||
title: 'hello world',
|
||||
tag: expect.arrayContaining(['bike', 'car']),
|
||||
});
|
||||
});
|
||||
|
||||
test('pagination', async () => {
|
||||
await writeData(
|
||||
Array.from({ length: 100 }).reduce((acc: any, _, i) => {
|
||||
acc['apple' + i] = {
|
||||
tag: ['apple'],
|
||||
};
|
||||
return acc;
|
||||
}, {}) as any
|
||||
);
|
||||
|
||||
const result = await index.search(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'tag',
|
||||
match: 'apple',
|
||||
},
|
||||
{
|
||||
pagination: {
|
||||
skip: 0,
|
||||
limit: 10,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
expect(result).toEqual({
|
||||
nodes: expect.arrayContaining(
|
||||
Array.from({ length: 10 }).fill({
|
||||
id: expect.stringContaining('apple'),
|
||||
score: expect.anything(),
|
||||
})
|
||||
),
|
||||
pagination: {
|
||||
count: 100,
|
||||
hasMore: true,
|
||||
limit: 10,
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
|
||||
const result2 = await index.search(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'tag',
|
||||
match: 'apple',
|
||||
},
|
||||
{
|
||||
pagination: {
|
||||
skip: 10,
|
||||
limit: 10,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
expect(result2).toEqual({
|
||||
nodes: expect.arrayContaining(
|
||||
Array.from({ length: 10 }).fill({
|
||||
id: expect.stringContaining('apple'),
|
||||
score: expect.anything(),
|
||||
})
|
||||
),
|
||||
pagination: {
|
||||
count: 100,
|
||||
hasMore: true,
|
||||
limit: 10,
|
||||
skip: 10,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('aggr', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
tag: ['car', 'bike'],
|
||||
},
|
||||
affine1: {
|
||||
title: 'affine',
|
||||
tag: ['motorcycle', 'bike'],
|
||||
},
|
||||
affine2: {
|
||||
title: 'affine',
|
||||
tag: ['motorcycle', 'airplane'],
|
||||
},
|
||||
});
|
||||
|
||||
const result = await index.aggregate(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'affine',
|
||||
},
|
||||
'tag'
|
||||
);
|
||||
|
||||
expect(result).toEqual({
|
||||
buckets: expect.arrayContaining([
|
||||
{ key: 'motorcycle', count: 2, score: expect.anything() },
|
||||
{ key: 'bike', count: 1, score: expect.anything() },
|
||||
{ key: 'airplane', count: 1, score: expect.anything() },
|
||||
]),
|
||||
pagination: {
|
||||
count: 3,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('hits', async () => {
|
||||
await writeData(
|
||||
Array.from({ length: 100 }).reduce((acc: any, _, i) => {
|
||||
acc['apple' + i] = {
|
||||
title: 'apple',
|
||||
tag: ['apple', 'fruit'],
|
||||
};
|
||||
return acc;
|
||||
}, {}) as any
|
||||
);
|
||||
const result = await index.aggregate(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'apple',
|
||||
},
|
||||
'tag',
|
||||
{
|
||||
hits: {
|
||||
pagination: {
|
||||
skip: 0,
|
||||
limit: 5,
|
||||
},
|
||||
highlights: [
|
||||
{
|
||||
field: 'title',
|
||||
before: '<b>',
|
||||
end: '</b>',
|
||||
},
|
||||
],
|
||||
fields: ['title', 'tag'],
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
expect(result).toEqual({
|
||||
buckets: expect.arrayContaining([
|
||||
{
|
||||
key: 'apple',
|
||||
count: 100,
|
||||
score: expect.anything(),
|
||||
hits: {
|
||||
pagination: {
|
||||
count: 100,
|
||||
hasMore: true,
|
||||
limit: 5,
|
||||
skip: 0,
|
||||
},
|
||||
nodes: expect.arrayContaining(
|
||||
Array.from({ length: 5 }).fill({
|
||||
id: expect.stringContaining('apple'),
|
||||
score: expect.anything(),
|
||||
highlights: {
|
||||
title: [expect.stringContaining('<b>apple</b>')],
|
||||
},
|
||||
fields: {
|
||||
title: expect.stringContaining('apple'),
|
||||
tag: expect.arrayContaining(['apple', 'fruit']),
|
||||
},
|
||||
})
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
key: 'fruit',
|
||||
count: 100,
|
||||
score: expect.anything(),
|
||||
hits: {
|
||||
pagination: {
|
||||
count: 100,
|
||||
hasMore: true,
|
||||
limit: 5,
|
||||
skip: 0,
|
||||
},
|
||||
nodes: expect.arrayContaining(
|
||||
Array.from({ length: 5 }).fill({
|
||||
id: expect.stringContaining('apple'),
|
||||
score: expect.anything(),
|
||||
highlights: {
|
||||
title: [expect.stringContaining('<b>apple</b>')],
|
||||
},
|
||||
fields: {
|
||||
title: expect.stringContaining('apple'),
|
||||
tag: expect.arrayContaining(['apple', 'fruit']),
|
||||
},
|
||||
})
|
||||
),
|
||||
},
|
||||
},
|
||||
]),
|
||||
pagination: {
|
||||
count: 2,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('exists', async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
tag: '111',
|
||||
},
|
||||
'2': {
|
||||
tag: '222',
|
||||
},
|
||||
'3': {
|
||||
title: 'hello world',
|
||||
tag: '333',
|
||||
},
|
||||
});
|
||||
|
||||
const result = await index.search({
|
||||
type: 'exists',
|
||||
field: 'title',
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
nodes: expect.arrayContaining([
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
{
|
||||
id: '3',
|
||||
score: expect.anything(),
|
||||
},
|
||||
]),
|
||||
pagination: {
|
||||
count: 2,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test(
|
||||
'subscribe',
|
||||
{
|
||||
timeout: 30000,
|
||||
},
|
||||
async () => {
|
||||
await writeData({
|
||||
'1': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
|
||||
let value = null as any;
|
||||
index
|
||||
.search$({
|
||||
type: 'match',
|
||||
field: 'title',
|
||||
match: 'hello world',
|
||||
})
|
||||
.pipe(map(v => (value = v)))
|
||||
.subscribe();
|
||||
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
|
||||
await writeData({
|
||||
'2': {
|
||||
title: 'hello world',
|
||||
},
|
||||
});
|
||||
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '1',
|
||||
score: expect.anything(),
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 2,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
|
||||
const writer = await index.write();
|
||||
writer.delete('1');
|
||||
await writer.commit();
|
||||
|
||||
await vitest.waitFor(
|
||||
() => {
|
||||
expect(value).toEqual({
|
||||
nodes: [
|
||||
{
|
||||
id: '2',
|
||||
score: expect.anything(),
|
||||
},
|
||||
],
|
||||
pagination: {
|
||||
count: 1,
|
||||
hasMore: false,
|
||||
limit: expect.anything(),
|
||||
skip: 0,
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 10000,
|
||||
}
|
||||
);
|
||||
}
|
||||
);
|
||||
});
|
||||
@@ -1,51 +0,0 @@
|
||||
import type { Schema } from './schema';
|
||||
|
||||
export class Document<S extends Schema = any> {
|
||||
constructor(public readonly id: string) {}
|
||||
|
||||
fields = new Map<keyof S, string[]>();
|
||||
|
||||
public insert<F extends keyof S>(field: F, value: string | string[]) {
|
||||
const values = this.fields.get(field) ?? [];
|
||||
if (Array.isArray(value)) {
|
||||
values.push(...value);
|
||||
} else {
|
||||
values.push(value);
|
||||
}
|
||||
this.fields.set(field, values);
|
||||
}
|
||||
|
||||
get<F extends keyof S>(field: F): string[] | string | undefined {
|
||||
const values = this.fields.get(field);
|
||||
if (values === undefined) {
|
||||
return undefined;
|
||||
} else if (values.length === 1) {
|
||||
return values[0];
|
||||
} else {
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
||||
static from<S extends Schema>(
|
||||
id: string,
|
||||
map:
|
||||
| Partial<Record<keyof S, string | string[]>>
|
||||
| Map<keyof S, string | string[]>
|
||||
): Document<S> {
|
||||
const doc = new Document(id);
|
||||
|
||||
if (map instanceof Map) {
|
||||
for (const [key, value] of map) {
|
||||
doc.insert(key, value);
|
||||
}
|
||||
} else {
|
||||
for (const key in map) {
|
||||
if (map[key] === undefined || map[key] === null) {
|
||||
continue;
|
||||
}
|
||||
doc.insert(key, map[key]);
|
||||
}
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
export type FieldType = 'Integer' | 'FullText' | 'String' | 'Boolean';
|
||||
@@ -1,10 +0,0 @@
|
||||
import { expect, test } from 'vitest';
|
||||
|
||||
import { bm25 } from '../bm25';
|
||||
|
||||
test('bm25', () => {
|
||||
expect(bm25(1, 1, 10, 10, 15)).toEqual(3.2792079793859643);
|
||||
expect(bm25(2, 1, 10, 10, 15) > bm25(1, 1, 10, 10, 15)).toBeTruthy();
|
||||
expect(bm25(1, 1, 10, 10, 15) > bm25(2, 1, 10, 100, 15)).toBeTruthy();
|
||||
expect(bm25(1, 1, 10, 10, 15) > bm25(1, 1, 10, 100, 15)).toBeTruthy();
|
||||
});
|
||||
@@ -1,32 +0,0 @@
|
||||
import { expect, test } from 'vitest';
|
||||
|
||||
import { highlighter } from '../highlighter';
|
||||
|
||||
test('highlighter', () => {
|
||||
expect(highlighter('0123456789', '<b>', '</b>', [[3, 5]])).toEqual(
|
||||
'012<b>34</b>56789'
|
||||
);
|
||||
|
||||
expect(
|
||||
highlighter(
|
||||
'012345678901234567890123456789012345678901234567890123456789',
|
||||
'<b>',
|
||||
'</b>',
|
||||
[[59, 60]]
|
||||
)
|
||||
).toEqual('...0123456789012345678901234567890123456789012345678<b>9</b>');
|
||||
|
||||
expect(
|
||||
highlighter(
|
||||
'012345678901234567890123456789012345678901234567890123456789',
|
||||
'<b>',
|
||||
'</b>',
|
||||
[
|
||||
[10, 11],
|
||||
[49, 51],
|
||||
]
|
||||
)
|
||||
).toEqual(
|
||||
'0123456789<b>0</b>12345678901234567890123456789012345678<b>9</b>...'
|
||||
);
|
||||
});
|
||||
@@ -1,128 +0,0 @@
|
||||
import { expect, test } from 'vitest';
|
||||
|
||||
import { GeneralTokenizer } from '../tokenizer';
|
||||
|
||||
test('tokenizer', () => {
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('hello world,\n AFFiNE');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{ term: 'hello', start: 0, end: 5 },
|
||||
{ term: 'world', start: 7, end: 12 },
|
||||
{ term: 'affine', start: 15, end: 21 },
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('你好世界,阿芬');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{
|
||||
end: 2,
|
||||
start: 0,
|
||||
term: '你好',
|
||||
},
|
||||
{
|
||||
end: 3,
|
||||
start: 1,
|
||||
term: '好世',
|
||||
},
|
||||
{
|
||||
end: 4,
|
||||
start: 2,
|
||||
term: '世界',
|
||||
},
|
||||
{
|
||||
end: 7,
|
||||
start: 5,
|
||||
term: '阿芬',
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('1阿2芬');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{ term: '1', start: 0, end: 1 },
|
||||
{ term: '阿', start: 1, end: 2 },
|
||||
{ term: '2', start: 2, end: 3 },
|
||||
{ term: '芬', start: 3, end: 4 },
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('안녕하세요 세계');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{
|
||||
end: 2,
|
||||
start: 0,
|
||||
term: '안녕',
|
||||
},
|
||||
{
|
||||
end: 3,
|
||||
start: 1,
|
||||
term: '녕하',
|
||||
},
|
||||
{
|
||||
end: 4,
|
||||
start: 2,
|
||||
term: '하세',
|
||||
},
|
||||
{
|
||||
end: 5,
|
||||
start: 3,
|
||||
term: '세요',
|
||||
},
|
||||
{
|
||||
end: 8,
|
||||
start: 6,
|
||||
term: '세계',
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('ハローワールド');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{ term: 'ハロ', start: 0, end: 2 },
|
||||
{ term: 'ロー', start: 1, end: 3 },
|
||||
{ term: 'ーワ', start: 2, end: 4 },
|
||||
{ term: 'ワー', start: 3, end: 5 },
|
||||
{ term: 'ール', start: 4, end: 6 },
|
||||
{ term: 'ルド', start: 5, end: 7 },
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('はろーわーるど');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{ term: 'はろ', start: 0, end: 2 },
|
||||
{ term: 'ろー', start: 1, end: 3 },
|
||||
{ term: 'ーわ', start: 2, end: 4 },
|
||||
{ term: 'わー', start: 3, end: 5 },
|
||||
{ term: 'ーる', start: 4, end: 6 },
|
||||
{ term: 'るど', start: 5, end: 7 },
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('👋1️⃣🚪👋🏿');
|
||||
|
||||
expect(tokens).toEqual([
|
||||
{ term: '👋', start: 0, end: 2 },
|
||||
{ term: '1️⃣', start: 2, end: 5 },
|
||||
{ term: '🚪', start: 5, end: 7 },
|
||||
{ term: '👋🏿', start: 7, end: 11 },
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const tokens = new GeneralTokenizer().tokenize('1️');
|
||||
|
||||
expect(tokens).toEqual([{ term: '1️', start: 0, end: 2 }]);
|
||||
}
|
||||
});
|
||||
@@ -1,62 +0,0 @@
|
||||
/**
|
||||
* Parameters of the BM25+ scoring algorithm. Customizing these is almost never
|
||||
* necessary, and finetuning them requires an understanding of the BM25 scoring
|
||||
* model.
|
||||
*
|
||||
* Some information about BM25 (and BM25+) can be found at these links:
|
||||
*
|
||||
* - https://en.wikipedia.org/wiki/Okapi_BM25
|
||||
* - https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/
|
||||
*/
|
||||
export type BM25Params = {
|
||||
/** Term frequency saturation point.
|
||||
*
|
||||
* Recommended values are between `1.2` and `2`. Higher values increase the
|
||||
* difference in score between documents with higher and lower term
|
||||
* frequencies. Setting this to `0` or a negative value is invalid. Defaults
|
||||
* to `1.2`
|
||||
*/
|
||||
k: number;
|
||||
|
||||
/**
|
||||
* Length normalization impact.
|
||||
*
|
||||
* Recommended values are around `0.75`. Higher values increase the weight
|
||||
* that field length has on scoring. Setting this to `0` (not recommended)
|
||||
* means that the field length has no effect on scoring. Negative values are
|
||||
* invalid. Defaults to `0.7`.
|
||||
*/
|
||||
b: number;
|
||||
|
||||
/**
|
||||
* BM25+ frequency normalization lower bound (usually called δ).
|
||||
*
|
||||
* Recommended values are between `0.5` and `1`. Increasing this parameter
|
||||
* increases the minimum relevance of one occurrence of a search term
|
||||
* regardless of its (possibly very long) field length. Negative values are
|
||||
* invalid. Defaults to `0.5`.
|
||||
*/
|
||||
d: number;
|
||||
};
|
||||
|
||||
const defaultBM25params: BM25Params = { k: 1.2, b: 0.7, d: 0.5 };
|
||||
|
||||
export const bm25 = (
|
||||
termFreq: number,
|
||||
matchingCount: number,
|
||||
totalCount: number,
|
||||
fieldLength: number,
|
||||
avgFieldLength: number,
|
||||
bm25params: BM25Params = defaultBM25params
|
||||
): number => {
|
||||
const { k, b, d } = bm25params;
|
||||
const invDocFreq = Math.log(
|
||||
1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5)
|
||||
);
|
||||
return (
|
||||
invDocFreq *
|
||||
(d +
|
||||
(termFreq * (k + 1)) /
|
||||
(termFreq + k * (1 - b + b * (fieldLength / avgFieldLength))))
|
||||
);
|
||||
};
|
||||
@@ -1,551 +0,0 @@
|
||||
import { DebugLogger } from '@affine/debug';
|
||||
import {
|
||||
type DBSchema,
|
||||
type IDBPDatabase,
|
||||
type IDBPTransaction,
|
||||
openDB,
|
||||
type StoreNames,
|
||||
} from 'idb';
|
||||
|
||||
import {
|
||||
type AggregateOptions,
|
||||
type AggregateResult,
|
||||
Document,
|
||||
type Query,
|
||||
type Schema,
|
||||
type SearchOptions,
|
||||
type SearchResult,
|
||||
} from '../../';
|
||||
import { highlighter } from './highlighter';
|
||||
import {
|
||||
BooleanInvertedIndex,
|
||||
FullTextInvertedIndex,
|
||||
IntegerInvertedIndex,
|
||||
type InvertedIndex,
|
||||
StringInvertedIndex,
|
||||
} from './inverted-index';
|
||||
import { Match } from './match';
|
||||
|
||||
const logger = new DebugLogger('indexeddb');
|
||||
|
||||
export interface IndexDB extends DBSchema {
|
||||
kvMetadata: {
|
||||
key: string;
|
||||
value: {
|
||||
key: string;
|
||||
value: any;
|
||||
};
|
||||
};
|
||||
records: {
|
||||
key: number;
|
||||
value: {
|
||||
id: string;
|
||||
data: Map<string, string[]>;
|
||||
};
|
||||
indexes: { id: string };
|
||||
};
|
||||
invertedIndex: {
|
||||
key: number;
|
||||
value: {
|
||||
nid: number;
|
||||
pos?: {
|
||||
i: number /* index */;
|
||||
l: number /* length */;
|
||||
rs: [number, number][] /* ranges: [start, end] */;
|
||||
};
|
||||
key: ArrayBuffer;
|
||||
};
|
||||
indexes: { key: ArrayBuffer; nid: number };
|
||||
};
|
||||
}
|
||||
|
||||
export type DataStructRWTransaction = IDBPTransaction<
|
||||
IndexDB,
|
||||
ArrayLike<StoreNames<IndexDB>>,
|
||||
'readwrite'
|
||||
>;
|
||||
|
||||
export type DataStructROTransaction = IDBPTransaction<
|
||||
IndexDB,
|
||||
ArrayLike<StoreNames<IndexDB>>,
|
||||
'readonly' | 'readwrite'
|
||||
>;
|
||||
|
||||
export class DataStruct {
|
||||
private initializePromise: Promise<void> | null = null;
|
||||
database: IDBPDatabase<IndexDB> = null as any;
|
||||
invertedIndex = new Map<string, InvertedIndex>();
|
||||
|
||||
constructor(
|
||||
readonly databaseName: string,
|
||||
readonly schema: Schema
|
||||
) {
|
||||
for (const [key, type] of Object.entries(schema)) {
|
||||
const typeInfo = typeof type === 'string' ? { type } : type;
|
||||
if (typeInfo.index === false) {
|
||||
// If index is false, we don't need to create an inverted index for this field.
|
||||
continue;
|
||||
}
|
||||
if (typeInfo.type === 'String') {
|
||||
this.invertedIndex.set(key, new StringInvertedIndex(key));
|
||||
} else if (typeInfo.type === 'Integer') {
|
||||
this.invertedIndex.set(key, new IntegerInvertedIndex(key));
|
||||
} else if (typeInfo.type === 'FullText') {
|
||||
this.invertedIndex.set(key, new FullTextInvertedIndex(key));
|
||||
} else if (type === 'Boolean') {
|
||||
this.invertedIndex.set(key, new BooleanInvertedIndex(key));
|
||||
} else {
|
||||
throw new Error(`Field type '${type}' not supported`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async insert(trx: DataStructRWTransaction, document: Document) {
|
||||
const exists = await trx
|
||||
.objectStore('records')
|
||||
.index('id')
|
||||
.get(document.id);
|
||||
|
||||
if (exists) {
|
||||
throw new Error('Document already exists');
|
||||
}
|
||||
|
||||
const dataMap = new Map();
|
||||
|
||||
for (const [key, values] of document.fields) {
|
||||
const type = this.schema[key as string];
|
||||
if (!type) {
|
||||
continue;
|
||||
}
|
||||
const typeInfo = typeof type === 'string' ? { type } : type;
|
||||
if (typeInfo.store !== false) {
|
||||
// If store is false, the field will not be stored
|
||||
dataMap.set(key, values);
|
||||
}
|
||||
}
|
||||
|
||||
const nid = await trx.objectStore('records').put({
|
||||
id: document.id,
|
||||
data: dataMap,
|
||||
});
|
||||
|
||||
for (const [key, values] of document.fields) {
|
||||
const iidx = this.invertedIndex.get(key as string);
|
||||
if (!iidx) {
|
||||
continue;
|
||||
}
|
||||
await iidx.insert(trx, nid, values);
|
||||
}
|
||||
}
|
||||
|
||||
private async delete(trx: DataStructRWTransaction, id: string) {
|
||||
const nid = await trx.objectStore('records').index('id').getKey(id);
|
||||
|
||||
if (nid) {
|
||||
await trx.objectStore('records').delete(nid);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
const indexIds = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('nid')
|
||||
.getAllKeys(nid);
|
||||
|
||||
for (const indexId of indexIds) {
|
||||
await trx.objectStore('invertedIndex').delete(indexId);
|
||||
}
|
||||
}
|
||||
|
||||
async batchWrite(
|
||||
trx: DataStructRWTransaction,
|
||||
deletes: string[],
|
||||
inserts: Document[]
|
||||
) {
|
||||
const startTime = performance.now();
|
||||
try {
|
||||
for (const del of deletes) {
|
||||
await this.delete(trx, del);
|
||||
}
|
||||
for (const inst of inserts) {
|
||||
await this.insert(trx, inst);
|
||||
}
|
||||
} finally {
|
||||
const endTime = performance.now();
|
||||
if (BUILD_CONFIG.debug) {
|
||||
performance.measure(
|
||||
`[IndexedDB Indexer] Batch Write (${this.databaseName})`,
|
||||
{
|
||||
start: startTime,
|
||||
end: endTime,
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.debug(
|
||||
`[indexer ${this.databaseName}] batchWrite`,
|
||||
endTime - startTime,
|
||||
'ms'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async matchAll(trx: DataStructROTransaction): Promise<Match> {
|
||||
const allNids = await trx.objectStore('records').getAllKeys();
|
||||
const match = new Match();
|
||||
|
||||
for (const nid of allNids) {
|
||||
match.addScore(nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
private async queryRaw(
|
||||
trx: DataStructROTransaction,
|
||||
query: Query<any>
|
||||
): Promise<Match> {
|
||||
if (query.type === 'match') {
|
||||
const iidx = this.invertedIndex.get(query.field as string);
|
||||
if (!iidx) {
|
||||
return new Match();
|
||||
}
|
||||
return await iidx.match(trx, query.match);
|
||||
} else if (query.type === 'boolean') {
|
||||
const weights = [];
|
||||
for (const q of query.queries) {
|
||||
weights.push(await this.queryRaw(trx, q));
|
||||
}
|
||||
if (query.occur === 'must') {
|
||||
return weights.reduce((acc, w) => acc.and(w));
|
||||
} else if (query.occur === 'must_not') {
|
||||
const total = weights.reduce((acc, w) => acc.and(w));
|
||||
return (await this.matchAll(trx)).exclude(total);
|
||||
} else if (query.occur === 'should') {
|
||||
return weights.reduce((acc, w) => acc.or(w));
|
||||
}
|
||||
} else if (query.type === 'all') {
|
||||
return await this.matchAll(trx);
|
||||
} else if (query.type === 'boost') {
|
||||
return (await this.queryRaw(trx, query.query)).boost(query.boost);
|
||||
} else if (query.type === 'exists') {
|
||||
const iidx = this.invertedIndex.get(query.field as string);
|
||||
if (!iidx) {
|
||||
return new Match();
|
||||
}
|
||||
return await iidx.all(trx);
|
||||
}
|
||||
throw new Error(`Query type '${query.type}' not supported`);
|
||||
}
|
||||
|
||||
async clear(trx: DataStructRWTransaction) {
|
||||
await trx.objectStore('records').clear();
|
||||
await trx.objectStore('invertedIndex').clear();
|
||||
await trx.objectStore('kvMetadata').clear();
|
||||
}
|
||||
|
||||
async search(
|
||||
trx: DataStructROTransaction,
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any>
|
||||
): Promise<SearchResult<any, any>> {
|
||||
const startTime = performance.now();
|
||||
try {
|
||||
const pagination = {
|
||||
skip: options.pagination?.skip ?? 0,
|
||||
limit: options.pagination?.limit ?? 100,
|
||||
};
|
||||
|
||||
const match = await this.queryRaw(trx, query);
|
||||
|
||||
const nids = match
|
||||
.toArray()
|
||||
.slice(pagination.skip, pagination.skip + pagination.limit);
|
||||
|
||||
const nodes = [];
|
||||
for (const nid of nids) {
|
||||
const record = await trx.objectStore('records').get(nid);
|
||||
if (!record) {
|
||||
continue;
|
||||
}
|
||||
nodes.push(this.resultNode(record, options, match, nid));
|
||||
}
|
||||
|
||||
return {
|
||||
pagination: {
|
||||
count: match.size(),
|
||||
hasMore: match.size() > pagination.limit + pagination.skip,
|
||||
limit: pagination.limit,
|
||||
skip: pagination.skip,
|
||||
},
|
||||
nodes: nodes,
|
||||
};
|
||||
} finally {
|
||||
const endTime = performance.now();
|
||||
if (BUILD_CONFIG.debug) {
|
||||
performance.measure(
|
||||
`[IndexedDB Indexer] Search (${this.databaseName})`,
|
||||
{
|
||||
detail: { query, options },
|
||||
start: startTime,
|
||||
end: endTime,
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.debug(
|
||||
`[indexer ${this.databaseName}] search`,
|
||||
endTime - startTime,
|
||||
'ms',
|
||||
query
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async aggregate(
|
||||
trx: DataStructROTransaction,
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any>
|
||||
): Promise<AggregateResult<any, any>> {
|
||||
const startTime = performance.now();
|
||||
try {
|
||||
const pagination = {
|
||||
skip: options.pagination?.skip ?? 0,
|
||||
limit: options.pagination?.limit ?? 100,
|
||||
};
|
||||
|
||||
const hitPagination = options.hits
|
||||
? {
|
||||
skip: options.hits.pagination?.skip ?? 0,
|
||||
limit: options.hits.pagination?.limit ?? 3,
|
||||
}
|
||||
: {
|
||||
skip: 0,
|
||||
limit: 0,
|
||||
};
|
||||
|
||||
const match = await this.queryRaw(trx, query);
|
||||
|
||||
const nids = match.toArray();
|
||||
|
||||
const buckets: {
|
||||
key: string;
|
||||
nids: number[];
|
||||
hits: SearchResult<any, any>['nodes'];
|
||||
}[] = [];
|
||||
|
||||
for (const nid of nids) {
|
||||
const record = await trx.objectStore('records').get(nid);
|
||||
if (!record) {
|
||||
continue;
|
||||
}
|
||||
const values = record.data.get(field);
|
||||
for (const value of values ?? []) {
|
||||
let bucket;
|
||||
let bucketIndex = buckets.findIndex(b => b.key === value);
|
||||
if (bucketIndex === -1) {
|
||||
bucket = { key: value, nids: [], hits: [] };
|
||||
buckets.push(bucket);
|
||||
bucketIndex = buckets.length - 1;
|
||||
} else {
|
||||
bucket = buckets[bucketIndex];
|
||||
}
|
||||
|
||||
if (
|
||||
bucketIndex >= pagination.skip &&
|
||||
bucketIndex < pagination.skip + pagination.limit
|
||||
) {
|
||||
bucket.nids.push(nid);
|
||||
if (
|
||||
bucket.nids.length - 1 >= hitPagination.skip &&
|
||||
bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit
|
||||
) {
|
||||
bucket.hits.push(
|
||||
this.resultNode(record, options.hits ?? {}, match, nid)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
buckets: buckets
|
||||
.slice(pagination.skip, pagination.skip + pagination.limit)
|
||||
.map(bucket => {
|
||||
const result = {
|
||||
key: bucket.key,
|
||||
score: match.getScore(bucket.nids[0]),
|
||||
count: bucket.nids.length,
|
||||
} as AggregateResult<any, any>['buckets'][number];
|
||||
|
||||
if (options.hits) {
|
||||
(result as any).hits = {
|
||||
pagination: {
|
||||
count: bucket.nids.length,
|
||||
hasMore:
|
||||
bucket.nids.length >
|
||||
hitPagination.limit + hitPagination.skip,
|
||||
limit: hitPagination.limit,
|
||||
skip: hitPagination.skip,
|
||||
},
|
||||
nodes: bucket.hits,
|
||||
} as SearchResult<any, any>;
|
||||
}
|
||||
|
||||
return result;
|
||||
}),
|
||||
pagination: {
|
||||
count: buckets.length,
|
||||
hasMore: buckets.length > pagination.limit + pagination.skip,
|
||||
limit: pagination.limit,
|
||||
skip: pagination.skip,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
const endTime = performance.now();
|
||||
if (BUILD_CONFIG.debug) {
|
||||
performance.measure(
|
||||
`[IndexedDB Indexer] Aggregate (${this.databaseName})`,
|
||||
{
|
||||
detail: { query, field, options },
|
||||
start: startTime,
|
||||
end: endTime,
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.debug(
|
||||
`[indexer ${this.databaseName}] aggregate`,
|
||||
endTime - startTime,
|
||||
'ms'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async getAll(
|
||||
trx: DataStructROTransaction,
|
||||
ids?: string[]
|
||||
): Promise<Document[]> {
|
||||
const docs = [];
|
||||
if (ids) {
|
||||
for (const id of ids) {
|
||||
const record = await trx.objectStore('records').index('id').get(id);
|
||||
if (record) {
|
||||
docs.push(Document.from(record.id, record.data));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const records = await trx.objectStore('records').getAll();
|
||||
for (const record of records) {
|
||||
docs.push(Document.from(record.id, record.data));
|
||||
}
|
||||
}
|
||||
|
||||
return docs;
|
||||
}
|
||||
|
||||
async has(trx: DataStructROTransaction, id: string): Promise<boolean> {
|
||||
const nid = await trx.objectStore('records').index('id').getKey(id);
|
||||
return nid !== undefined;
|
||||
}
|
||||
|
||||
async readonly() {
|
||||
await this.ensureInitialized();
|
||||
return this.database.transaction(
|
||||
['records', 'invertedIndex', 'kvMetadata'],
|
||||
'readonly',
|
||||
{
|
||||
durability: 'relaxed',
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
async readwrite() {
|
||||
await this.ensureInitialized();
|
||||
return this.database.transaction(
|
||||
['records', 'invertedIndex', 'kvMetadata'],
|
||||
'readwrite',
|
||||
{
|
||||
durability: 'relaxed',
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private async ensureInitialized() {
|
||||
if (this.database) {
|
||||
return;
|
||||
}
|
||||
this.initializePromise ??= this.initialize();
|
||||
await this.initializePromise;
|
||||
}
|
||||
|
||||
private async initialize() {
|
||||
this.database = await openDB<IndexDB>(this.databaseName, 1, {
|
||||
upgrade(database) {
|
||||
database.createObjectStore('kvMetadata', {
|
||||
keyPath: 'key',
|
||||
});
|
||||
const recordsStore = database.createObjectStore('records', {
|
||||
autoIncrement: true,
|
||||
});
|
||||
recordsStore.createIndex('id', 'id', {
|
||||
unique: true,
|
||||
});
|
||||
const invertedIndexStore = database.createObjectStore('invertedIndex', {
|
||||
autoIncrement: true,
|
||||
});
|
||||
invertedIndexStore.createIndex('key', 'key', { unique: false });
|
||||
invertedIndexStore.createIndex('nid', 'nid', { unique: false });
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
private resultNode(
|
||||
record: { id: string; data: Map<string, string[]> },
|
||||
options: SearchOptions<any>,
|
||||
match?: Match,
|
||||
nid?: number
|
||||
): SearchResult<any, any>['nodes'][number] {
|
||||
const node = {
|
||||
id: record.id,
|
||||
score: match && nid ? match.getScore(nid) : 1,
|
||||
} as any;
|
||||
|
||||
if (options.fields) {
|
||||
const fields = {} as Record<string, string | string[]>;
|
||||
for (const field of options.fields as string[]) {
|
||||
fields[field] = record.data.get(field) ?? [''];
|
||||
if (fields[field].length === 1) {
|
||||
fields[field] = fields[field][0];
|
||||
}
|
||||
}
|
||||
node.fields = fields;
|
||||
}
|
||||
|
||||
if (match && nid && options.highlights) {
|
||||
const highlights = {} as Record<string, string[]>;
|
||||
for (const { field, before, end } of options.highlights) {
|
||||
const highlightValues = match.getHighlighters(nid, field);
|
||||
if (highlightValues) {
|
||||
const rawValues = record.data.get(field) ?? [];
|
||||
highlights[field] = Array.from(highlightValues)
|
||||
.map(([index, ranges]) => {
|
||||
const raw = rawValues[index];
|
||||
|
||||
if (raw) {
|
||||
return (
|
||||
highlighter(raw, before, end, ranges, {
|
||||
maxPrefix: 20,
|
||||
maxLength: 50,
|
||||
}) ?? ''
|
||||
);
|
||||
}
|
||||
|
||||
return '';
|
||||
})
|
||||
.filter(Boolean);
|
||||
}
|
||||
}
|
||||
node.highlights = highlights;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
export function highlighter(
|
||||
originText: string,
|
||||
before: string,
|
||||
after: string,
|
||||
matches: [number, number][],
|
||||
{
|
||||
maxLength = 50,
|
||||
maxPrefix = 20,
|
||||
}: { maxLength?: number; maxPrefix?: number } = {}
|
||||
) {
|
||||
const merged = mergeRanges(matches);
|
||||
|
||||
if (merged.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const firstMatch = merged[0][0];
|
||||
const start = Math.max(
|
||||
0,
|
||||
Math.min(firstMatch - maxPrefix, originText.length - maxLength)
|
||||
);
|
||||
const end = Math.min(start + maxLength, originText.length);
|
||||
const text = originText.substring(start, end);
|
||||
|
||||
let result = '';
|
||||
|
||||
let pointer = 0;
|
||||
for (const match of merged) {
|
||||
const matchStart = match[0] - start;
|
||||
const matchEnd = match[1] - start;
|
||||
if (matchStart >= text.length) {
|
||||
break;
|
||||
}
|
||||
result += text.substring(pointer, matchStart);
|
||||
pointer = matchStart;
|
||||
const highlighted = text.substring(matchStart, matchEnd);
|
||||
|
||||
if (highlighted.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
result += `${before}${highlighted}${after}`;
|
||||
pointer = matchEnd;
|
||||
}
|
||||
result += text.substring(pointer);
|
||||
|
||||
if (start > 0) {
|
||||
result = `...${result}`;
|
||||
}
|
||||
|
||||
if (end < originText.length) {
|
||||
result = `${result}...`;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function mergeRanges(intervals: [number, number][]) {
|
||||
if (intervals.length === 0) return [];
|
||||
|
||||
intervals.sort((a, b) => a[0] - b[0]);
|
||||
|
||||
const merged = [intervals[0]];
|
||||
|
||||
for (let i = 1; i < intervals.length; i++) {
|
||||
const last = merged[merged.length - 1];
|
||||
const current = intervals[i];
|
||||
|
||||
if (current[0] <= last[1]) {
|
||||
last[1] = Math.max(last[1], current[1]);
|
||||
} else {
|
||||
merged.push(current);
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
@@ -1,182 +0,0 @@
|
||||
import { DebugLogger } from '@affine/debug';
|
||||
import type { Observable } from 'rxjs';
|
||||
import { merge, of, Subject, throttleTime } from 'rxjs';
|
||||
|
||||
import { backoffRetry, fromPromise } from '../../../../livedata';
|
||||
import { exhaustMapWithTrailing } from '../../../../utils/';
|
||||
import {
|
||||
type AggregateOptions,
|
||||
type AggregateResult,
|
||||
type Document,
|
||||
type Index,
|
||||
type IndexStorage,
|
||||
type IndexWriter,
|
||||
type Query,
|
||||
type Schema,
|
||||
type SearchOptions,
|
||||
type SearchResult,
|
||||
} from '../../';
|
||||
import { DataStruct, type DataStructRWTransaction } from './data-struct';
|
||||
|
||||
const logger = new DebugLogger('IndexedDBIndex');
|
||||
|
||||
export class IndexedDBIndex<S extends Schema> implements Index<S> {
|
||||
data: DataStruct = new DataStruct(this.databaseName, this.schema);
|
||||
broadcast$ = new Subject();
|
||||
|
||||
constructor(
|
||||
private readonly schema: S,
|
||||
private readonly databaseName: string = 'indexer'
|
||||
) {
|
||||
const channel = new BroadcastChannel(this.databaseName + ':indexer');
|
||||
channel.onmessage = () => {
|
||||
this.broadcast$.next(1);
|
||||
};
|
||||
}
|
||||
|
||||
async get(id: string): Promise<Document<S> | null> {
|
||||
return (await this.getAll([id]))[0] ?? null;
|
||||
}
|
||||
|
||||
async getAll(ids: string[]): Promise<Document<S>[]> {
|
||||
const trx = await this.data.readonly();
|
||||
return this.data.getAll(trx, ids);
|
||||
}
|
||||
|
||||
async write(): Promise<IndexWriter<S>> {
|
||||
return new IndexedDBIndexWriter(this.data, await this.data.readwrite());
|
||||
}
|
||||
|
||||
async has(id: string): Promise<boolean> {
|
||||
const trx = await this.data.readonly();
|
||||
return this.data.has(trx, id);
|
||||
}
|
||||
|
||||
async search(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): Promise<SearchResult<any, SearchOptions<any>>> {
|
||||
const trx = await this.data.readonly();
|
||||
return this.data.search(trx, query, options);
|
||||
}
|
||||
|
||||
search$(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): Observable<SearchResult<any, SearchOptions<any>>> {
|
||||
return merge(of(1), this.broadcast$).pipe(
|
||||
throttleTime(3000, undefined, { leading: true, trailing: true }),
|
||||
exhaustMapWithTrailing(() => {
|
||||
return fromPromise(async () => {
|
||||
try {
|
||||
const trx = await this.data.readonly();
|
||||
return await this.data.search(trx, query, options);
|
||||
} catch (error) {
|
||||
logger.error('search error', error);
|
||||
throw error;
|
||||
}
|
||||
}).pipe(backoffRetry());
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
async aggregate(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): Promise<AggregateResult<any, AggregateOptions<any>>> {
|
||||
const trx = await this.data.readonly();
|
||||
return this.data.aggregate(trx, query, field, options);
|
||||
}
|
||||
|
||||
aggregate$(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): Observable<AggregateResult<S, AggregateOptions<any>>> {
|
||||
return merge(of(1), this.broadcast$).pipe(
|
||||
throttleTime(3000, undefined, { leading: true, trailing: true }),
|
||||
exhaustMapWithTrailing(() => {
|
||||
return fromPromise(async () => {
|
||||
try {
|
||||
const trx = await this.data.readonly();
|
||||
return await this.data.aggregate(trx, query, field, options);
|
||||
} catch (error) {
|
||||
logger.error('aggregate error', error);
|
||||
throw error;
|
||||
}
|
||||
}).pipe(backoffRetry());
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
async clear(): Promise<void> {
|
||||
const trx = await this.data.readwrite();
|
||||
return this.data.clear(trx);
|
||||
}
|
||||
}
|
||||
|
||||
export class IndexedDBIndexWriter<S extends Schema> implements IndexWriter<S> {
|
||||
inserts: Document[] = [];
|
||||
deletes: string[] = [];
|
||||
channel = new BroadcastChannel(this.data.databaseName + ':indexer');
|
||||
|
||||
constructor(
|
||||
private readonly data: DataStruct,
|
||||
private readonly trx: DataStructRWTransaction
|
||||
) {}
|
||||
|
||||
async get(id: string): Promise<Document<S> | null> {
|
||||
return (await this.getAll([id]))[0] ?? null;
|
||||
}
|
||||
|
||||
async getAll(ids?: string[]): Promise<Document<S>[]> {
|
||||
const trx = await this.data.readonly();
|
||||
return this.data.getAll(trx, ids);
|
||||
}
|
||||
|
||||
insert(document: Document): void {
|
||||
this.inserts.push(document);
|
||||
}
|
||||
delete(id: string): void {
|
||||
this.deletes.push(id);
|
||||
}
|
||||
put(document: Document): void {
|
||||
this.delete(document.id);
|
||||
this.insert(document);
|
||||
}
|
||||
|
||||
async commit(): Promise<void> {
|
||||
await this.data.batchWrite(this.trx, this.deletes, this.inserts);
|
||||
this.trx.commit();
|
||||
this.channel.postMessage(1);
|
||||
}
|
||||
|
||||
rollback(): void {}
|
||||
|
||||
has(id: string): Promise<boolean> {
|
||||
return this.data.has(this.trx, id);
|
||||
}
|
||||
|
||||
async search(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): Promise<SearchResult<any, SearchOptions<any>>> {
|
||||
return this.data.search(this.trx, query, options);
|
||||
}
|
||||
|
||||
async aggregate(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): Promise<AggregateResult<any, AggregateOptions<any>>> {
|
||||
return this.data.aggregate(this.trx, query, field, options);
|
||||
}
|
||||
}
|
||||
|
||||
export class IndexedDBIndexStorage implements IndexStorage {
|
||||
constructor(private readonly databaseName: string) {}
|
||||
getIndex<S extends Schema>(name: string, s: S): Index<S> {
|
||||
return new IndexedDBIndex(s, this.databaseName + ':' + name);
|
||||
}
|
||||
}
|
||||
@@ -1,469 +0,0 @@
|
||||
import { bm25 } from './bm25';
|
||||
import type {
|
||||
DataStructROTransaction,
|
||||
DataStructRWTransaction,
|
||||
} from './data-struct';
|
||||
import { Match } from './match';
|
||||
import { GeneralTokenizer, type Token } from './tokenizer';
|
||||
|
||||
export interface InvertedIndex {
|
||||
fieldKey: string;
|
||||
|
||||
match(trx: DataStructROTransaction, term: string): Promise<Match>;
|
||||
|
||||
all(trx: DataStructROTransaction): Promise<Match>;
|
||||
|
||||
insert(
|
||||
trx: DataStructRWTransaction,
|
||||
id: number,
|
||||
terms: string[]
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
export class StringInvertedIndex implements InvertedIndex {
|
||||
constructor(
|
||||
readonly fieldKey: string,
|
||||
readonly index: boolean = true,
|
||||
readonly store: boolean = true
|
||||
) {}
|
||||
|
||||
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(InvertedIndexKey.forString(this.fieldKey, term).buffer());
|
||||
const match = new Match();
|
||||
for (const obj of objs) {
|
||||
match.addScore(obj.nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async all(trx: DataStructROTransaction): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(
|
||||
IDBKeyRange.bound(
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
|
||||
)
|
||||
);
|
||||
|
||||
const set = new Set<number>();
|
||||
for (const obj of objs) {
|
||||
set.add(obj.nid);
|
||||
}
|
||||
|
||||
const match = new Match();
|
||||
for (const nid of set) {
|
||||
match.addScore(nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
|
||||
for (const term of terms) {
|
||||
await trx.objectStore('invertedIndex').put({
|
||||
key: InvertedIndexKey.forString(this.fieldKey, term).buffer(),
|
||||
nid: id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class IntegerInvertedIndex implements InvertedIndex {
|
||||
constructor(
|
||||
readonly fieldKey: string,
|
||||
readonly index: boolean = true,
|
||||
readonly store: boolean = true
|
||||
) {}
|
||||
|
||||
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(InvertedIndexKey.forInt64(this.fieldKey, BigInt(term)).buffer());
|
||||
const match = new Match();
|
||||
for (const obj of objs) {
|
||||
match.addScore(obj.nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
async all(trx: DataStructROTransaction): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(
|
||||
IDBKeyRange.bound(
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
|
||||
)
|
||||
);
|
||||
|
||||
const set = new Set<number>();
|
||||
for (const obj of objs) {
|
||||
set.add(obj.nid);
|
||||
}
|
||||
|
||||
const match = new Match();
|
||||
for (const nid of set) {
|
||||
match.addScore(nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
|
||||
for (const term of terms) {
|
||||
await trx.objectStore('invertedIndex').put({
|
||||
key: InvertedIndexKey.forInt64(this.fieldKey, BigInt(term)).buffer(),
|
||||
nid: id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class BooleanInvertedIndex implements InvertedIndex {
|
||||
constructor(
|
||||
readonly fieldKey: string,
|
||||
readonly index: boolean = true,
|
||||
readonly store: boolean = true
|
||||
) {}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
async all(trx: DataStructROTransaction): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(
|
||||
IDBKeyRange.bound(
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
|
||||
)
|
||||
);
|
||||
|
||||
const set = new Set<number>();
|
||||
for (const obj of objs) {
|
||||
set.add(obj.nid);
|
||||
}
|
||||
|
||||
const match = new Match();
|
||||
for (const nid of set) {
|
||||
match.addScore(nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(
|
||||
InvertedIndexKey.forBoolean(this.fieldKey, term === 'true').buffer()
|
||||
);
|
||||
const match = new Match();
|
||||
for (const obj of objs) {
|
||||
match.addScore(obj.nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
|
||||
for (const term of terms) {
|
||||
await trx.objectStore('invertedIndex').put({
|
||||
key: InvertedIndexKey.forBoolean(
|
||||
this.fieldKey,
|
||||
term === 'true'
|
||||
).buffer(),
|
||||
nid: id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class FullTextInvertedIndex implements InvertedIndex {
|
||||
constructor(
|
||||
readonly fieldKey: string,
|
||||
readonly index: boolean = true,
|
||||
readonly store: boolean = true
|
||||
) {}
|
||||
|
||||
async match(trx: DataStructROTransaction, term: string): Promise<Match> {
|
||||
const queryTokens = new GeneralTokenizer().tokenize(term);
|
||||
const matched = new Map<
|
||||
number,
|
||||
Map<
|
||||
number, // index
|
||||
{
|
||||
score: number;
|
||||
ranges: [number, number][];
|
||||
}
|
||||
>
|
||||
>();
|
||||
const avgFieldLength =
|
||||
(
|
||||
await trx
|
||||
.objectStore('kvMetadata')
|
||||
.get(`full-text:avg-field-length:${this.fieldKey}`)
|
||||
)?.value ?? 0;
|
||||
for (const token of queryTokens) {
|
||||
const key = InvertedIndexKey.forString(this.fieldKey, token.term);
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(
|
||||
IDBKeyRange.bound(key.buffer(), key.add1().buffer(), false, true)
|
||||
);
|
||||
const submatched: {
|
||||
nid: number;
|
||||
score: number;
|
||||
position: {
|
||||
index: number;
|
||||
ranges: [number, number][];
|
||||
};
|
||||
}[] = [];
|
||||
for (const obj of objs) {
|
||||
const key = InvertedIndexKey.fromBuffer(obj.key);
|
||||
const originTokenTerm = key.asString();
|
||||
const matchLength = token.term.length;
|
||||
const position = obj.pos ?? {
|
||||
i: 0,
|
||||
l: 0,
|
||||
rs: [],
|
||||
};
|
||||
const termFreq = position.rs.length;
|
||||
const totalCount = objs.length;
|
||||
const fieldLength = position.l;
|
||||
const score =
|
||||
bm25(termFreq, 1, totalCount, fieldLength, avgFieldLength) *
|
||||
(matchLength / originTokenTerm.length);
|
||||
const match = {
|
||||
score,
|
||||
positions: new Map(),
|
||||
};
|
||||
const ranges = match.positions.get(position.i) || [];
|
||||
ranges.push(
|
||||
...position.rs.map(([start, _end]) => [start, start + matchLength])
|
||||
);
|
||||
match.positions.set(position.i, ranges);
|
||||
submatched.push({
|
||||
nid: obj.nid,
|
||||
score,
|
||||
position: {
|
||||
index: position.i,
|
||||
ranges: position.rs.map(([start, _end]) => [
|
||||
start,
|
||||
start + matchLength,
|
||||
]),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// normalize score
|
||||
const maxScore = submatched.reduce((acc, s) => Math.max(acc, s.score), 0);
|
||||
const minScore = submatched.reduce((acc, s) => Math.min(acc, s.score), 0);
|
||||
for (const { nid, score, position } of submatched) {
|
||||
const normalizedScore =
|
||||
maxScore === minScore
|
||||
? score
|
||||
: (score - minScore) / (maxScore - minScore);
|
||||
const match =
|
||||
matched.get(nid) ??
|
||||
new Map<
|
||||
number, // index
|
||||
{
|
||||
score: number;
|
||||
ranges: [number, number][];
|
||||
}
|
||||
>();
|
||||
const item = match.get(position.index) || {
|
||||
score: 0,
|
||||
ranges: [],
|
||||
};
|
||||
item.score += normalizedScore;
|
||||
item.ranges.push(...position.ranges);
|
||||
match.set(position.index, item);
|
||||
matched.set(nid, match);
|
||||
}
|
||||
}
|
||||
const match = new Match();
|
||||
for (const [nid, items] of matched) {
|
||||
if (items.size === 0) {
|
||||
break;
|
||||
}
|
||||
let highestScore = -1;
|
||||
let highestIndex = -1;
|
||||
let highestRanges: [number, number][] = [];
|
||||
for (const [index, { score, ranges }] of items) {
|
||||
if (score > highestScore) {
|
||||
highestScore = score;
|
||||
highestIndex = index;
|
||||
highestRanges = ranges;
|
||||
}
|
||||
}
|
||||
match.addScore(nid, highestScore);
|
||||
match.addHighlighter(nid, this.fieldKey, highestIndex, highestRanges);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
async all(trx: DataStructROTransaction): Promise<Match> {
|
||||
const objs = await trx
|
||||
.objectStore('invertedIndex')
|
||||
.index('key')
|
||||
.getAll(
|
||||
IDBKeyRange.bound(
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
|
||||
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
|
||||
)
|
||||
);
|
||||
|
||||
const set = new Set<number>();
|
||||
for (const obj of objs) {
|
||||
set.add(obj.nid);
|
||||
}
|
||||
|
||||
const match = new Match();
|
||||
for (const nid of set) {
|
||||
match.addScore(nid, 1);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const tokenMap = new Map<string, Token[]>();
|
||||
const originString = terms[i];
|
||||
|
||||
const tokens = new GeneralTokenizer().tokenize(originString);
|
||||
|
||||
for (const token of tokens) {
|
||||
const tokens = tokenMap.get(token.term) || [];
|
||||
tokens.push(token);
|
||||
tokenMap.set(token.term, tokens);
|
||||
}
|
||||
|
||||
for (const [term, tokens] of tokenMap) {
|
||||
await trx.objectStore('invertedIndex').put({
|
||||
key: InvertedIndexKey.forString(this.fieldKey, term).buffer(),
|
||||
nid: id,
|
||||
pos: {
|
||||
l: originString.length,
|
||||
i: i,
|
||||
rs: tokens.map(token => [token.start, token.end]),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const kvMetadataStore = trx.objectStore('kvMetadata');
|
||||
// update avg-field-length
|
||||
const totalCount =
|
||||
(await kvMetadataStore.get(`full-text:field-count:${this.fieldKey}`))
|
||||
?.value ?? 0;
|
||||
const avgFieldLength =
|
||||
(
|
||||
await kvMetadataStore.get(
|
||||
`full-text:avg-field-length:${this.fieldKey}`
|
||||
)
|
||||
)?.value ?? 0;
|
||||
await kvMetadataStore.put({
|
||||
key: `full-text:field-count:${this.fieldKey}`,
|
||||
value: totalCount + 1,
|
||||
});
|
||||
await kvMetadataStore.put({
|
||||
key: `full-text:avg-field-length:${this.fieldKey}`,
|
||||
value:
|
||||
avgFieldLength +
|
||||
(terms.reduce((acc, term) => acc + term.length, 0) - avgFieldLength) /
|
||||
(totalCount + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class InvertedIndexKey {
|
||||
constructor(
|
||||
readonly field: Uint8Array,
|
||||
readonly value: Uint8Array,
|
||||
readonly gap: Uint8Array = new Uint8Array([58])
|
||||
) {}
|
||||
|
||||
asString() {
|
||||
return new TextDecoder().decode(this.value);
|
||||
}
|
||||
|
||||
asInt64() {
|
||||
return new DataView(this.value.buffer).getBigInt64(
|
||||
0,
|
||||
false
|
||||
); /* big-endian */
|
||||
}
|
||||
|
||||
add1() {
|
||||
if (this.value.byteLength > 0) {
|
||||
const bytes = new Uint8Array(this.value.slice(0));
|
||||
let carry = 1;
|
||||
for (let i = bytes.length - 1; i >= 0 && carry > 0; i--) {
|
||||
const sum = bytes[i] + carry;
|
||||
bytes[i] = sum % 256;
|
||||
carry = sum >> 8;
|
||||
}
|
||||
return new InvertedIndexKey(this.field, bytes);
|
||||
} else {
|
||||
return new InvertedIndexKey(
|
||||
this.field,
|
||||
new Uint8Array(0),
|
||||
new Uint8Array([59])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static forPrefix(field: string) {
|
||||
return new InvertedIndexKey(
|
||||
new TextEncoder().encode(field),
|
||||
new Uint8Array(0)
|
||||
);
|
||||
}
|
||||
|
||||
static forString(field: string, value: string) {
|
||||
return new InvertedIndexKey(
|
||||
new TextEncoder().encode(field),
|
||||
new TextEncoder().encode(value)
|
||||
);
|
||||
}
|
||||
|
||||
static forBoolean(field: string, value: boolean) {
|
||||
const bytes = new Uint8Array(1);
|
||||
bytes.set([value ? 1 : 0]);
|
||||
return new InvertedIndexKey(new TextEncoder().encode(field), bytes);
|
||||
}
|
||||
|
||||
static forInt64(field: string, value: bigint) {
|
||||
const bytes = new Uint8Array(8);
|
||||
new DataView(bytes.buffer).setBigInt64(0, value, false); /* big-endian */
|
||||
return new InvertedIndexKey(new TextEncoder().encode(field), bytes);
|
||||
}
|
||||
|
||||
buffer() {
|
||||
const tmp = new Uint8Array(
|
||||
this.field.byteLength + (this.value?.byteLength ?? 0) + 1
|
||||
);
|
||||
tmp.set(new Uint8Array(this.field), 0);
|
||||
tmp.set(new Uint8Array(this.gap), this.field.byteLength);
|
||||
if (this.value.byteLength > 0) {
|
||||
tmp.set(new Uint8Array(this.value), this.field.byteLength + 1);
|
||||
}
|
||||
return tmp.buffer;
|
||||
}
|
||||
|
||||
static fromBuffer(buffer: ArrayBuffer) {
|
||||
const array = new Uint8Array(buffer);
|
||||
const fieldLength = array.indexOf(58);
|
||||
const field = array.slice(0, fieldLength);
|
||||
const value = array.slice(fieldLength + 1);
|
||||
return new InvertedIndexKey(field, value);
|
||||
}
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
export class Match {
|
||||
scores = new Map<number, number>();
|
||||
/**
|
||||
* nid -> field -> index(multi value field) -> [start, end][]
|
||||
*/
|
||||
highlighters = new Map<
|
||||
number,
|
||||
Map<string, Map<number, [number, number][]>>
|
||||
>();
|
||||
|
||||
constructor() {}
|
||||
|
||||
size() {
|
||||
return this.scores.size;
|
||||
}
|
||||
|
||||
getScore(id: number) {
|
||||
return this.scores.get(id) ?? 0;
|
||||
}
|
||||
|
||||
addScore(id: number, score: number) {
|
||||
const currentScore = this.scores.get(id) || 0;
|
||||
this.scores.set(id, currentScore + score);
|
||||
}
|
||||
|
||||
getHighlighters(id: number, field: string) {
|
||||
return this.highlighters.get(id)?.get(field);
|
||||
}
|
||||
|
||||
addHighlighter(
|
||||
id: number,
|
||||
field: string,
|
||||
index: number,
|
||||
newRanges: [number, number][]
|
||||
) {
|
||||
const fields =
|
||||
this.highlighters.get(id) ||
|
||||
new Map<string, Map<number, [number, number][]>>();
|
||||
const values = fields.get(field) || new Map<number, [number, number][]>();
|
||||
const ranges = values.get(index) || [];
|
||||
ranges.push(...newRanges);
|
||||
values.set(index, ranges);
|
||||
fields.set(field, values);
|
||||
this.highlighters.set(id, fields);
|
||||
}
|
||||
|
||||
and(other: Match) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (other.scores.has(id)) {
|
||||
newWeight.addScore(id, score + (other.scores.get(id) ?? 0));
|
||||
newWeight.copyExtData(this, id);
|
||||
newWeight.copyExtData(other, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
or(other: Match) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
for (const [id, score] of other.scores) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(other, id);
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
exclude(other: Match) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (!other.scores.has(id)) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
boost(boost: number) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
newWeight.addScore(id, score * boost);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
toArray() {
|
||||
return Array.from(this.scores.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.map(e => e[0]);
|
||||
}
|
||||
|
||||
private copyExtData(from: Match, id: number) {
|
||||
for (const [field, values] of from.highlighters.get(id) ?? []) {
|
||||
for (const [index, ranges] of values) {
|
||||
this.addHighlighter(id, field, index, ranges);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,162 +0,0 @@
|
||||
import Graphemer from 'graphemer';
|
||||
|
||||
export interface Tokenizer {
|
||||
tokenize(text: string): Token[];
|
||||
}
|
||||
|
||||
export interface Token {
|
||||
term: string;
|
||||
start: number;
|
||||
end: number;
|
||||
}
|
||||
|
||||
export class SimpleTokenizer implements Tokenizer {
|
||||
tokenize(text: string): Token[] {
|
||||
const tokens: Token[] = [];
|
||||
let start = 0;
|
||||
let end = 0;
|
||||
let inWord = false;
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const c = text[i];
|
||||
if (c.match(/[\n\r\p{Z}\p{P}]/u)) {
|
||||
if (inWord) {
|
||||
end = i;
|
||||
tokens.push({
|
||||
term: text.substring(start, end).toLowerCase(),
|
||||
start,
|
||||
end,
|
||||
});
|
||||
inWord = false;
|
||||
}
|
||||
} else {
|
||||
if (!inWord) {
|
||||
start = i;
|
||||
end = i;
|
||||
inWord = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inWord) {
|
||||
tokens.push({
|
||||
term: text.substring(start).toLowerCase(),
|
||||
start,
|
||||
end: text.length,
|
||||
});
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
|
||||
export class NGramTokenizer implements Tokenizer {
|
||||
constructor(private readonly n: number) {}
|
||||
|
||||
tokenize(text: string): Token[] {
|
||||
const splitted: Token[] = [];
|
||||
for (let i = 0; i < text.length; ) {
|
||||
const nextBreak = Graphemer.nextBreak(text, i);
|
||||
const c = text.substring(i, nextBreak);
|
||||
|
||||
splitted.push({
|
||||
term: c,
|
||||
start: i,
|
||||
end: nextBreak,
|
||||
});
|
||||
|
||||
i = nextBreak;
|
||||
}
|
||||
const tokens: Token[] = [];
|
||||
for (let i = 0; i < splitted.length - this.n + 1; i++) {
|
||||
tokens.push(
|
||||
splitted.slice(i, i + this.n).reduce(
|
||||
(acc, t) => ({
|
||||
term: acc.term + t.term,
|
||||
start: Math.min(acc.start, t.start),
|
||||
end: Math.max(acc.end, t.end),
|
||||
}),
|
||||
{ term: '', start: Infinity, end: -Infinity }
|
||||
)
|
||||
);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
|
||||
export class GeneralTokenizer implements Tokenizer {
|
||||
constructor() {}
|
||||
|
||||
tokenizeWord(word: string, lang: string): Token[] {
|
||||
if (lang === 'en') {
|
||||
return [{ term: word.toLowerCase(), start: 0, end: word.length }];
|
||||
} else if (lang === 'cjk') {
|
||||
if (word.length < 3) {
|
||||
return [{ term: word, start: 0, end: word.length }];
|
||||
}
|
||||
return new NGramTokenizer(2).tokenize(word);
|
||||
} else if (lang === 'emoji') {
|
||||
return new NGramTokenizer(1).tokenize(word);
|
||||
} else if (lang === '-') {
|
||||
return [];
|
||||
}
|
||||
|
||||
throw new Error('Not implemented');
|
||||
}
|
||||
|
||||
testLang(c: string): string {
|
||||
if (c.match(/[\p{Emoji}]/u)) {
|
||||
return 'emoji';
|
||||
} else if (c.match(/[\p{sc=Han}\p{scx=Hira}\p{scx=Kana}\p{sc=Hang}]/u)) {
|
||||
return 'cjk';
|
||||
} else if (c.match(/[\n\r\p{Z}\p{P}]/u)) {
|
||||
return '-';
|
||||
} else {
|
||||
return 'en';
|
||||
}
|
||||
}
|
||||
|
||||
tokenize(text: string): Token[] {
|
||||
const tokens: Token[] = [];
|
||||
let start = 0;
|
||||
let end = 0;
|
||||
let lang: string | null = null;
|
||||
|
||||
for (let i = 0; i < text.length; ) {
|
||||
const nextBreak = Graphemer.nextBreak(text, i);
|
||||
const c = text.substring(i, nextBreak);
|
||||
|
||||
const l = this.testLang(c);
|
||||
if (lang !== l) {
|
||||
if (lang !== null) {
|
||||
end = i;
|
||||
tokens.push(
|
||||
...this.tokenizeWord(text.substring(start, end), lang).map(
|
||||
token => ({
|
||||
...token,
|
||||
start: token.start + start,
|
||||
end: token.end + start,
|
||||
})
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
start = i;
|
||||
end = i;
|
||||
lang = l;
|
||||
}
|
||||
|
||||
i = nextBreak;
|
||||
}
|
||||
if (lang !== null) {
|
||||
tokens.push(
|
||||
...this.tokenizeWord(text.substring(start, text.length), lang).map(
|
||||
token => ({
|
||||
...token,
|
||||
start: token.start + start,
|
||||
end: token.end + start,
|
||||
})
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
@@ -1,290 +0,0 @@
|
||||
import {
|
||||
type AggregateOptions,
|
||||
type AggregateResult,
|
||||
Document,
|
||||
type Query,
|
||||
type Schema,
|
||||
type SearchOptions,
|
||||
type SearchResult,
|
||||
} from '../../';
|
||||
import {
|
||||
BooleanInvertedIndex,
|
||||
FullTextInvertedIndex,
|
||||
IntegerInvertedIndex,
|
||||
type InvertedIndex,
|
||||
StringInvertedIndex,
|
||||
} from './inverted-index';
|
||||
import { Match } from './match';
|
||||
|
||||
type DataRecord = {
|
||||
id: string;
|
||||
data: Map<string, string[]>;
|
||||
deleted: boolean;
|
||||
};
|
||||
|
||||
export class DataStruct {
|
||||
records: DataRecord[] = [];
|
||||
|
||||
idMap = new Map<string, number>();
|
||||
|
||||
invertedIndex = new Map<string, InvertedIndex>();
|
||||
|
||||
constructor(schema: Schema) {
|
||||
for (const [key, type] of Object.entries(schema)) {
|
||||
const typeInfo = typeof type === 'string' ? { type } : type;
|
||||
|
||||
if (typeInfo.type === 'String') {
|
||||
this.invertedIndex.set(key, new StringInvertedIndex(key));
|
||||
} else if (typeInfo.type === 'Integer') {
|
||||
this.invertedIndex.set(key, new IntegerInvertedIndex(key));
|
||||
} else if (typeInfo.type === 'FullText') {
|
||||
this.invertedIndex.set(key, new FullTextInvertedIndex(key));
|
||||
} else if (typeInfo.type === 'Boolean') {
|
||||
this.invertedIndex.set(key, new BooleanInvertedIndex(key));
|
||||
} else {
|
||||
throw new Error(`Field type '${type}' not supported`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getAll(ids?: string[]): Document[] {
|
||||
if (ids) {
|
||||
return ids
|
||||
.map(id => {
|
||||
const nid = this.idMap.get(id);
|
||||
if (nid === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
return Document.from(id, this.records[nid].data);
|
||||
})
|
||||
.filter((v): v is Document => v !== undefined);
|
||||
} else {
|
||||
return this.records
|
||||
.filter(record => !record.deleted)
|
||||
.map(record => Document.from(record.id, record.data));
|
||||
}
|
||||
}
|
||||
|
||||
insert(document: Document) {
|
||||
if (this.idMap.has(document.id)) {
|
||||
throw new Error('Document already exists');
|
||||
}
|
||||
|
||||
this.records.push({
|
||||
id: document.id,
|
||||
data: document.fields as Map<string, string[]>,
|
||||
deleted: false,
|
||||
});
|
||||
|
||||
const nid = this.records.length - 1;
|
||||
this.idMap.set(document.id, nid);
|
||||
for (const [key, values] of document.fields) {
|
||||
for (const value of values) {
|
||||
const iidx = this.invertedIndex.get(key as string);
|
||||
if (!iidx) {
|
||||
throw new Error(
|
||||
`Inverted index '${key.toString()}' not found, document not match schema`
|
||||
);
|
||||
}
|
||||
iidx.insert(nid, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete(id: string) {
|
||||
const nid = this.idMap.get(id);
|
||||
if (nid === undefined) {
|
||||
throw new Error('Document not found');
|
||||
}
|
||||
|
||||
this.records[nid].deleted = true;
|
||||
this.records[nid].data = new Map();
|
||||
}
|
||||
|
||||
matchAll(): Match {
|
||||
const weight = new Match();
|
||||
for (let i = 0; i < this.records.length; i++) {
|
||||
weight.addScore(i, 1);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.records = [];
|
||||
this.idMap.clear();
|
||||
this.invertedIndex.forEach(v => v.clear());
|
||||
}
|
||||
|
||||
private queryRaw(query: Query<any>): Match {
|
||||
if (query.type === 'match') {
|
||||
const iidx = this.invertedIndex.get(query.field as string);
|
||||
if (!iidx) {
|
||||
throw new Error(`Field '${query.field as string}' not found`);
|
||||
}
|
||||
return iidx.match(query.match);
|
||||
} else if (query.type === 'boolean') {
|
||||
const weights = query.queries.map(q => this.queryRaw(q));
|
||||
if (query.occur === 'must') {
|
||||
return weights.reduce((acc, w) => acc.and(w));
|
||||
} else if (query.occur === 'must_not') {
|
||||
const total = weights.reduce((acc, w) => acc.and(w));
|
||||
return this.matchAll().exclude(total);
|
||||
} else if (query.occur === 'should') {
|
||||
return weights.reduce((acc, w) => acc.or(w));
|
||||
}
|
||||
} else if (query.type === 'all') {
|
||||
return this.matchAll();
|
||||
} else if (query.type === 'boost') {
|
||||
return this.queryRaw(query.query).boost(query.boost);
|
||||
} else if (query.type === 'exists') {
|
||||
const iidx = this.invertedIndex.get(query.field as string);
|
||||
if (!iidx) {
|
||||
throw new Error(`Field '${query.field as string}' not found`);
|
||||
}
|
||||
return iidx.all();
|
||||
}
|
||||
throw new Error(`Query type '${query.type}' not supported`);
|
||||
}
|
||||
|
||||
query(query: Query<any>): Match {
|
||||
return this.queryRaw(query).filter(id => !this.records[id].deleted);
|
||||
}
|
||||
|
||||
search(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): SearchResult<any, any> {
|
||||
const pagination = {
|
||||
skip: options.pagination?.skip ?? 0,
|
||||
limit: options.pagination?.limit ?? 100,
|
||||
};
|
||||
|
||||
const match = this.query(query);
|
||||
|
||||
const nids = match
|
||||
.toArray()
|
||||
.slice(pagination.skip, pagination.skip + pagination.limit);
|
||||
|
||||
return {
|
||||
pagination: {
|
||||
count: match.size(),
|
||||
hasMore: match.size() > pagination.limit + pagination.skip,
|
||||
limit: pagination.limit,
|
||||
skip: pagination.skip,
|
||||
},
|
||||
nodes: nids.map(nid => this.resultNode(match, nid, options)),
|
||||
};
|
||||
}
|
||||
|
||||
aggregate(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): AggregateResult<any, any> {
|
||||
const pagination = {
|
||||
skip: options.pagination?.skip ?? 0,
|
||||
limit: options.pagination?.limit ?? 100,
|
||||
};
|
||||
|
||||
const match = this.query(query);
|
||||
|
||||
const nids = match.toArray();
|
||||
|
||||
const buckets: { key: string; nids: number[] }[] = [];
|
||||
|
||||
for (const nid of nids) {
|
||||
for (const value of this.records[nid].data.get(field) ?? []) {
|
||||
let bucket = buckets.find(b => b.key === value);
|
||||
if (!bucket) {
|
||||
bucket = { key: value, nids: [] };
|
||||
buckets.push(bucket);
|
||||
}
|
||||
bucket.nids.push(nid);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
buckets: buckets
|
||||
.slice(pagination.skip, pagination.skip + pagination.limit)
|
||||
.map(bucket => {
|
||||
const result = {
|
||||
key: bucket.key,
|
||||
score: match.getScore(bucket.nids[0]),
|
||||
count: bucket.nids.length,
|
||||
} as AggregateResult<any, any>['buckets'][number];
|
||||
|
||||
if (options.hits) {
|
||||
const hitsOptions = options.hits;
|
||||
const pagination = {
|
||||
skip: options.hits.pagination?.skip ?? 0,
|
||||
limit: options.hits.pagination?.limit ?? 3,
|
||||
};
|
||||
|
||||
const hits = bucket.nids.slice(
|
||||
pagination.skip,
|
||||
pagination.skip + pagination.limit
|
||||
);
|
||||
|
||||
(result as any).hits = {
|
||||
pagination: {
|
||||
count: bucket.nids.length,
|
||||
hasMore:
|
||||
bucket.nids.length > pagination.limit + pagination.skip,
|
||||
limit: pagination.limit,
|
||||
skip: pagination.skip,
|
||||
},
|
||||
nodes: hits.map(nid => this.resultNode(match, nid, hitsOptions)),
|
||||
} as SearchResult<any, any>;
|
||||
}
|
||||
|
||||
return result;
|
||||
}),
|
||||
pagination: {
|
||||
count: buckets.length,
|
||||
hasMore: buckets.length > pagination.limit + pagination.skip,
|
||||
limit: pagination.limit,
|
||||
skip: pagination.skip,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
has(id: string): boolean {
|
||||
return this.idMap.has(id);
|
||||
}
|
||||
|
||||
private resultNode(
|
||||
match: Match,
|
||||
nid: number,
|
||||
options: SearchOptions<any>
|
||||
): SearchResult<any, any>['nodes'][number] {
|
||||
const node = {
|
||||
id: this.records[nid].id,
|
||||
score: match.getScore(nid),
|
||||
} as any;
|
||||
|
||||
if (options.fields) {
|
||||
const fields = {} as Record<string, string | string[]>;
|
||||
for (const field of options.fields as string[]) {
|
||||
fields[field] = this.records[nid].data.get(field) ?? [''];
|
||||
if (fields[field].length === 1) {
|
||||
fields[field] = fields[field][0];
|
||||
}
|
||||
}
|
||||
node.fields = fields;
|
||||
}
|
||||
|
||||
if (options.highlights) {
|
||||
const highlights = {} as Record<string, string[]>;
|
||||
for (const { field, before, end } of options.highlights) {
|
||||
highlights[field] = match
|
||||
.getHighlighters(nid, field)
|
||||
.flatMap(highlighter => {
|
||||
return highlighter(before, end);
|
||||
});
|
||||
}
|
||||
node.highlights = highlights;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
}
|
||||
@@ -1,141 +0,0 @@
|
||||
import { map, merge, type Observable, of, Subject, throttleTime } from 'rxjs';
|
||||
|
||||
import type {
|
||||
AggregateOptions,
|
||||
AggregateResult,
|
||||
Document,
|
||||
Index,
|
||||
IndexStorage,
|
||||
IndexWriter,
|
||||
Query,
|
||||
Schema,
|
||||
SearchOptions,
|
||||
SearchResult,
|
||||
} from '../../';
|
||||
import { DataStruct } from './data-struct';
|
||||
|
||||
export class MemoryIndex<S extends Schema> implements Index<S> {
|
||||
private readonly data: DataStruct = new DataStruct(this.schema);
|
||||
broadcast$ = new Subject<number>();
|
||||
|
||||
constructor(private readonly schema: Schema) {}
|
||||
|
||||
write(): Promise<IndexWriter<S>> {
|
||||
return Promise.resolve(new MemoryIndexWriter(this.data, this.broadcast$));
|
||||
}
|
||||
|
||||
async get(id: string): Promise<Document<S> | null> {
|
||||
return (await this.getAll([id]))[0] ?? null;
|
||||
}
|
||||
|
||||
getAll(ids?: string[]): Promise<Document<S>[]> {
|
||||
return Promise.resolve(this.data.getAll(ids));
|
||||
}
|
||||
|
||||
has(id: string): Promise<boolean> {
|
||||
return Promise.resolve(this.data.has(id));
|
||||
}
|
||||
|
||||
async search(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): Promise<SearchResult<any, any>> {
|
||||
return this.data.search(query, options);
|
||||
}
|
||||
|
||||
search$(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): Observable<SearchResult<any, any>> {
|
||||
return merge(of(1), this.broadcast$).pipe(
|
||||
throttleTime(500, undefined, { leading: false, trailing: true }),
|
||||
map(() => this.data.search(query, options))
|
||||
);
|
||||
}
|
||||
|
||||
async aggregate(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): Promise<AggregateResult<any, any>> {
|
||||
return this.data.aggregate(query, field, options);
|
||||
}
|
||||
|
||||
aggregate$(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): Observable<AggregateResult<S, AggregateOptions<any>>> {
|
||||
return merge(of(1), this.broadcast$).pipe(
|
||||
throttleTime(500, undefined, { leading: false, trailing: true }),
|
||||
map(() => this.data.aggregate(query, field, options))
|
||||
);
|
||||
}
|
||||
|
||||
clear(): Promise<void> {
|
||||
this.data.clear();
|
||||
return Promise.resolve();
|
||||
}
|
||||
}
|
||||
|
||||
export class MemoryIndexWriter<S extends Schema> implements IndexWriter<S> {
|
||||
inserts: Document[] = [];
|
||||
deletes: string[] = [];
|
||||
|
||||
constructor(
|
||||
private readonly data: DataStruct,
|
||||
private readonly broadcast$: Subject<number>
|
||||
) {}
|
||||
|
||||
async get(id: string): Promise<Document<S> | null> {
|
||||
return (await this.getAll([id]))[0] ?? null;
|
||||
}
|
||||
|
||||
getAll(ids: string[]): Promise<Document<S>[]> {
|
||||
return Promise.resolve(this.data.getAll(ids));
|
||||
}
|
||||
|
||||
insert(document: Document): void {
|
||||
this.inserts.push(document);
|
||||
}
|
||||
delete(id: string): void {
|
||||
this.deletes.push(id);
|
||||
}
|
||||
put(document: Document): void {
|
||||
this.delete(document.id);
|
||||
this.insert(document);
|
||||
}
|
||||
async search(
|
||||
query: Query<any>,
|
||||
options: SearchOptions<any> = {}
|
||||
): Promise<SearchResult<any, any>> {
|
||||
return this.data.search(query, options);
|
||||
}
|
||||
async aggregate(
|
||||
query: Query<any>,
|
||||
field: string,
|
||||
options: AggregateOptions<any> = {}
|
||||
): Promise<AggregateResult<any, any>> {
|
||||
return this.data.aggregate(query, field, options);
|
||||
}
|
||||
commit(): Promise<void> {
|
||||
for (const del of this.deletes) {
|
||||
this.data.delete(del);
|
||||
}
|
||||
for (const inst of this.inserts) {
|
||||
this.data.insert(inst);
|
||||
}
|
||||
this.broadcast$.next(1);
|
||||
return Promise.resolve();
|
||||
}
|
||||
rollback(): void {}
|
||||
has(id: string): Promise<boolean> {
|
||||
return Promise.resolve(this.data.has(id));
|
||||
}
|
||||
}
|
||||
|
||||
export class MemoryIndexStorage implements IndexStorage {
|
||||
getIndex<S extends Schema>(_: string, schema: S): Index<S> {
|
||||
return new MemoryIndex(schema);
|
||||
}
|
||||
}
|
||||
@@ -1,220 +0,0 @@
|
||||
import Fuse from 'fuse.js';
|
||||
|
||||
import { Match } from './match';
|
||||
|
||||
export interface InvertedIndex {
|
||||
fieldKey: string;
|
||||
|
||||
match(term: string): Match;
|
||||
|
||||
all(): Match;
|
||||
|
||||
insert(id: number, term: string): void;
|
||||
|
||||
clear(): void;
|
||||
}
|
||||
|
||||
export class StringInvertedIndex implements InvertedIndex {
|
||||
index: Map<string, number[]> = new Map();
|
||||
|
||||
constructor(readonly fieldKey: string) {}
|
||||
|
||||
match(term: string): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const id of this.index.get(term) ?? []) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
all(): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const [_term, ids] of this.index) {
|
||||
for (const id of ids) {
|
||||
if (match.getScore(id) === 0) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
insert(id: number, term: string): void {
|
||||
const ids = this.index.get(term) ?? [];
|
||||
ids.push(id);
|
||||
this.index.set(term, ids);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.index.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export class IntegerInvertedIndex implements InvertedIndex {
|
||||
index: Map<string, number[]> = new Map();
|
||||
|
||||
constructor(readonly fieldKey: string) {}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
match(term: string): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const id of this.index.get(term) ?? []) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
all(): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const [_term, ids] of this.index) {
|
||||
for (const id of ids) {
|
||||
if (match.getScore(id) === 0) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
insert(id: number, term: string): void {
|
||||
const ids = this.index.get(term) ?? [];
|
||||
ids.push(id);
|
||||
this.index.set(term, ids);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.index.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export class BooleanInvertedIndex implements InvertedIndex {
|
||||
index: Map<boolean, number[]> = new Map();
|
||||
|
||||
constructor(readonly fieldKey: string) {}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
match(term: string): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const id of this.index.get(term === 'true') ?? []) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
all(): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const [_term, ids] of this.index) {
|
||||
for (const id of ids) {
|
||||
if (match.getScore(id) === 0) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
insert(id: number, term: string): void {
|
||||
const ids = this.index.get(term === 'true') ?? [];
|
||||
ids.push(id);
|
||||
this.index.set(term === 'true', ids);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.index.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export class FullTextInvertedIndex implements InvertedIndex {
|
||||
records = [] as { id: number; v: string }[];
|
||||
index = Fuse.createIndex(['v'], [] as { id: number; v: string }[]);
|
||||
|
||||
constructor(readonly fieldKey: string) {}
|
||||
|
||||
match(term: string): Match {
|
||||
const searcher = new Fuse(
|
||||
this.records,
|
||||
{
|
||||
includeScore: true,
|
||||
includeMatches: true,
|
||||
shouldSort: true,
|
||||
keys: ['v'],
|
||||
},
|
||||
this.index
|
||||
);
|
||||
const result = searcher.search(term);
|
||||
|
||||
const match = new Match();
|
||||
|
||||
for (const value of result) {
|
||||
match.addScore(value.item.id, 1 - (value.score ?? 1));
|
||||
|
||||
match.addHighlighter(value.item.id, this.fieldKey, (before, after) => {
|
||||
const matches = value.matches;
|
||||
if (!matches || matches.length === 0) {
|
||||
return [''];
|
||||
}
|
||||
|
||||
const firstMatch = matches[0];
|
||||
|
||||
const text = firstMatch.value;
|
||||
if (!text) {
|
||||
return [''];
|
||||
}
|
||||
|
||||
let result = '';
|
||||
let pointer = 0;
|
||||
for (const match of matches) {
|
||||
for (const [start, end] of match.indices) {
|
||||
result += text.substring(pointer, start);
|
||||
result += `${before}${text.substring(start, end + 1)}${after}`;
|
||||
pointer = end + 1;
|
||||
}
|
||||
}
|
||||
result += text.substring(pointer);
|
||||
|
||||
return [result];
|
||||
});
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
all(): Match {
|
||||
const match = new Match();
|
||||
|
||||
for (const { id } of this.records) {
|
||||
if (match.getScore(id) === 0) {
|
||||
match.addScore(id, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
insert(id: number, term: string): void {
|
||||
this.index.add({ id, v: term });
|
||||
this.records.push({ id, v: term });
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.records = [];
|
||||
this.index = Fuse.createIndex(['v'], [] as { id: number; v: string }[]);
|
||||
}
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
export class Match {
|
||||
scores = new Map<number, number>();
|
||||
highlighters = new Map<
|
||||
number,
|
||||
Map<string, ((before: string, after: string) => string[])[]>
|
||||
>();
|
||||
|
||||
constructor() {}
|
||||
|
||||
size() {
|
||||
return this.scores.size;
|
||||
}
|
||||
|
||||
getScore(id: number) {
|
||||
return this.scores.get(id) ?? 0;
|
||||
}
|
||||
|
||||
addScore(id: number, score: number) {
|
||||
const currentScore = this.scores.get(id) || 0;
|
||||
this.scores.set(id, currentScore + score);
|
||||
}
|
||||
|
||||
getHighlighters(id: number, field: string) {
|
||||
return this.highlighters.get(id)?.get(field) ?? [];
|
||||
}
|
||||
|
||||
addHighlighter(
|
||||
id: number,
|
||||
field: string,
|
||||
highlighter: (before: string, after: string) => string[]
|
||||
) {
|
||||
const fields = this.highlighters.get(id) || new Map();
|
||||
const highlighters = fields.get(field) || [];
|
||||
highlighters.push(highlighter);
|
||||
fields.set(field, highlighters);
|
||||
this.highlighters.set(id, fields);
|
||||
}
|
||||
|
||||
and(other: Match) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (other.scores.has(id)) {
|
||||
newWeight.addScore(id, score + (other.scores.get(id) ?? 0));
|
||||
newWeight.copyExtData(this, id);
|
||||
newWeight.copyExtData(other, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
or(other: Match) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
for (const [id, score] of other.scores) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(other, id);
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
exclude(other: Match) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (!other.scores.has(id)) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
boost(boost: number) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
newWeight.addScore(id, score * boost);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
toArray() {
|
||||
return Array.from(this.scores.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.map(e => e[0]);
|
||||
}
|
||||
|
||||
filter(predicate: (id: number) => boolean) {
|
||||
const newWeight = new Match();
|
||||
for (const [id, score] of this.scores) {
|
||||
if (predicate(id)) {
|
||||
newWeight.addScore(id, score);
|
||||
newWeight.copyExtData(this, id);
|
||||
}
|
||||
}
|
||||
return newWeight;
|
||||
}
|
||||
|
||||
private copyExtData(from: Match, id: number) {
|
||||
for (const [field, highlighters] of from.highlighters.get(id) ?? []) {
|
||||
for (const highlighter of highlighters) {
|
||||
this.addHighlighter(id, field, highlighter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
export * from './document';
|
||||
export * from './field-type';
|
||||
export * from './indexer';
|
||||
export * from './query';
|
||||
export * from './schema';
|
||||
export * from './searcher';
|
||||
@@ -1,41 +0,0 @@
|
||||
import type { Document } from './document';
|
||||
import type { Schema } from './schema';
|
||||
import type { Searcher, Subscriber } from './searcher';
|
||||
|
||||
export interface Index<S extends Schema>
|
||||
extends IndexReader<S>,
|
||||
Searcher<S>,
|
||||
Subscriber<S> {
|
||||
write(): Promise<IndexWriter<S>>;
|
||||
|
||||
clear(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface IndexWriter<S extends Schema>
|
||||
extends IndexReader<S>,
|
||||
Searcher<S> {
|
||||
insert(document: Document<S>): void;
|
||||
|
||||
put(document: Document<S>): void;
|
||||
|
||||
delete(id: string): void;
|
||||
|
||||
// TODO(@eyhn)
|
||||
// deleteByQuery(query: Query<S>): void;
|
||||
|
||||
commit(): Promise<void>;
|
||||
|
||||
rollback(): void;
|
||||
}
|
||||
|
||||
export interface IndexReader<S extends Schema> {
|
||||
get(id: string): Promise<Document<S> | null>;
|
||||
|
||||
getAll(ids?: string[]): Promise<Document<S>[]>;
|
||||
|
||||
has(id: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
export interface IndexStorage {
|
||||
getIndex<S extends Schema>(name: string, schema: S): Index<S>;
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
import type { Schema } from './schema';
|
||||
|
||||
export type MatchQuery<S extends Schema> = {
|
||||
type: 'match';
|
||||
field: keyof S;
|
||||
match: string;
|
||||
};
|
||||
|
||||
export type BoostQuery = {
|
||||
type: 'boost';
|
||||
query: Query<any>;
|
||||
boost: number;
|
||||
};
|
||||
|
||||
export type BooleanQuery<S extends Schema> = {
|
||||
type: 'boolean';
|
||||
occur: 'should' | 'must' | 'must_not';
|
||||
queries: Query<S>[];
|
||||
};
|
||||
|
||||
export type ExistsQuery<S extends Schema> = {
|
||||
type: 'exists';
|
||||
field: keyof S;
|
||||
};
|
||||
|
||||
export type AllQuery = {
|
||||
type: 'all';
|
||||
};
|
||||
|
||||
export type Query<S extends Schema> =
|
||||
| BooleanQuery<S>
|
||||
| MatchQuery<S>
|
||||
| AllQuery
|
||||
| ExistsQuery<S>
|
||||
| BoostQuery;
|
||||
@@ -1,25 +0,0 @@
|
||||
import type { FieldType } from './field-type';
|
||||
|
||||
export type Schema = Record<
|
||||
string,
|
||||
| FieldType
|
||||
| {
|
||||
type: FieldType;
|
||||
/**
|
||||
* If false, the field will not be indexed, and thus not searchable.
|
||||
*
|
||||
* default: true
|
||||
*/
|
||||
index?: boolean;
|
||||
/**
|
||||
* If false, the field will not be stored, and not included in the search result.
|
||||
*
|
||||
* default: true
|
||||
*/
|
||||
store?: boolean;
|
||||
}
|
||||
>;
|
||||
|
||||
export function defineSchema<T extends Schema>(schema: T): T {
|
||||
return schema;
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
import type { Observable } from 'rxjs';
|
||||
|
||||
import type { Query } from './query';
|
||||
import type { Schema } from './schema';
|
||||
|
||||
type HighlightAbleField<S extends Schema> = {
|
||||
[K in keyof S]: S[K] extends 'FullText' ? K : never;
|
||||
}[keyof S];
|
||||
|
||||
export interface Searcher<S extends Schema = any> {
|
||||
search<const O extends SearchOptions<S>>(
|
||||
query: Query<S>,
|
||||
options?: O
|
||||
): Promise<SearchResult<S, O>>;
|
||||
aggregate<const O extends AggregateOptions<S>>(
|
||||
query: Query<S>,
|
||||
field: keyof S,
|
||||
options?: O
|
||||
): Promise<AggregateResult<S, O>>;
|
||||
}
|
||||
|
||||
export interface Subscriber<S extends Schema = any> {
|
||||
search$<const O extends SearchOptions<S>>(
|
||||
query: Query<S>,
|
||||
options?: O
|
||||
): Observable<SearchResult<S, O>>;
|
||||
aggregate$<const O extends AggregateOptions<S>>(
|
||||
query: Query<S>,
|
||||
field: keyof S,
|
||||
options?: O
|
||||
): Observable<AggregateResult<S, O>>;
|
||||
}
|
||||
|
||||
type ResultPagination = {
|
||||
count: number;
|
||||
limit: number;
|
||||
skip: number;
|
||||
hasMore: boolean;
|
||||
};
|
||||
|
||||
type PaginationOption = {
|
||||
limit?: number;
|
||||
skip?: number;
|
||||
};
|
||||
|
||||
export type SearchOptions<S extends Schema> = {
|
||||
pagination?: PaginationOption;
|
||||
highlights?: {
|
||||
field: HighlightAbleField<S>;
|
||||
before: string;
|
||||
end: string;
|
||||
}[];
|
||||
fields?: (keyof S)[];
|
||||
};
|
||||
|
||||
export type SearchResult<S extends Schema, O extends SearchOptions<S>> = {
|
||||
pagination: ResultPagination;
|
||||
nodes: ({
|
||||
id: string;
|
||||
score: number;
|
||||
} & (O['fields'] extends any[]
|
||||
? { fields: { [key in O['fields'][number]]: string | string[] } }
|
||||
: unknown) &
|
||||
(O['highlights'] extends any[]
|
||||
? { highlights: { [key in O['highlights'][number]['field']]: string[] } }
|
||||
: unknown))[];
|
||||
};
|
||||
|
||||
export interface AggregateOptions<S extends Schema> {
|
||||
pagination?: PaginationOption;
|
||||
hits?: SearchOptions<S>;
|
||||
}
|
||||
|
||||
export type AggregateResult<S extends Schema, O extends AggregateOptions<S>> = {
|
||||
pagination: ResultPagination;
|
||||
buckets: ({
|
||||
key: string;
|
||||
score: number;
|
||||
count: number;
|
||||
} & (O['hits'] extends object
|
||||
? { hits: SearchResult<S, O['hits']> }
|
||||
: unknown))[];
|
||||
};
|
||||
@@ -1,47 +0,0 @@
|
||||
# job
|
||||
|
||||
Job system abstraction for AFFiNE. Currently, only `IndexedDBJobQueue` is implemented; more backends will be implemented in the future.
|
||||
|
||||
Run background jobs in browser & distributed environment. `runners` can consume tasks simultaneously without additional communication.
|
||||
|
||||
# Basic Usage
|
||||
|
||||
```ts
|
||||
const queue = new IndexedDBJobQueue('my-queue');
|
||||
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
{
|
||||
batchKey: '2',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
]);
|
||||
|
||||
const runner = new JobRunner(queue, job => {
|
||||
console.log(job);
|
||||
});
|
||||
|
||||
runner.start();
|
||||
|
||||
// Output:
|
||||
// { batchKey: '1', payload: { a: 'hello' } }
|
||||
// { batchKey: '2', payload: { a: 'world' } }
|
||||
```
|
||||
|
||||
## `batchKey`
|
||||
|
||||
Each job has a `batchKey`, and jobs with the same `batchKey` are handed over to one `runner` for execution at once.
|
||||
Additionally, if there are ongoing jobs with the same `batchKey`, other `runners` will not take on jobs with this `batchKey`, ensuring exclusive resource locking.
|
||||
|
||||
> In the future, `batchKey` will be used to implement priority.
|
||||
|
||||
## `timeout`
|
||||
|
||||
If the job execution time exceeds 30 seconds, it will be considered a timeout and reassigned to another `runner`.
|
||||
|
||||
## Error Handling
|
||||
|
||||
If an error is thrown during job execution, will log an error, but the job will be considered complete.
|
||||
@@ -1,231 +0,0 @@
|
||||
/**
|
||||
* @vitest-environment happy-dom
|
||||
*/
|
||||
import 'fake-indexeddb/auto';
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, test, vitest } from 'vitest';
|
||||
|
||||
import { IndexedDBJobQueue } from '../impl/indexeddb';
|
||||
import type { JobQueue } from '../queue';
|
||||
|
||||
let queue: JobQueue<{
|
||||
a: string;
|
||||
}> = null!;
|
||||
|
||||
describe.each([{ name: 'idb', backend: IndexedDBJobQueue }])(
|
||||
'impl tests($name)',
|
||||
({ backend }) => {
|
||||
beforeEach(async () => {
|
||||
queue = new backend();
|
||||
|
||||
await queue.clear();
|
||||
|
||||
vitest.useFakeTimers({
|
||||
toFake: ['Date'],
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vitest.useRealTimers();
|
||||
});
|
||||
|
||||
test('basic', async () => {
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
{
|
||||
batchKey: '2',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
]);
|
||||
const job1 = await queue.accept();
|
||||
const job2 = await queue.accept();
|
||||
|
||||
expect([job1!, job2!]).toEqual([
|
||||
[
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '2',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
],
|
||||
]);
|
||||
|
||||
const job3 = await queue.accept();
|
||||
expect(job3).toBeNull();
|
||||
|
||||
await queue.return(job1!);
|
||||
await queue.return(job2!);
|
||||
});
|
||||
|
||||
test('batch', async () => {
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
]);
|
||||
const job1 = await queue.accept();
|
||||
|
||||
expect(job1).toEqual(
|
||||
expect.arrayContaining([
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
])
|
||||
);
|
||||
});
|
||||
|
||||
test('timeout', async () => {
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
]);
|
||||
{
|
||||
const job = await queue.accept();
|
||||
|
||||
expect(job).toEqual([
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
{
|
||||
const job = await queue.accept();
|
||||
|
||||
expect(job).toBeNull();
|
||||
}
|
||||
|
||||
vitest.advanceTimersByTime(1000 * 60 * 60);
|
||||
|
||||
{
|
||||
const job = await queue.accept();
|
||||
|
||||
expect(job).toEqual([
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
]);
|
||||
}
|
||||
});
|
||||
|
||||
test('waitForAccept', async () => {
|
||||
const abort = new AbortController();
|
||||
|
||||
let result = null as any;
|
||||
queue.waitForAccept(abort.signal).then(jobs => (result = jobs));
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
|
||||
expect(result).toBeNull();
|
||||
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
]);
|
||||
|
||||
await vitest.waitFor(() => {
|
||||
expect(result).toEqual([
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
test('waitForAccept race', async () => {
|
||||
const abort = new AbortController();
|
||||
|
||||
let result1 = null as any;
|
||||
let result2 = null as any;
|
||||
queue.waitForAccept(abort.signal).then(jobs => (result1 = jobs));
|
||||
queue.waitForAccept(abort.signal).then(jobs => (result2 = jobs));
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
|
||||
expect(result1).toBeNull();
|
||||
expect(result2).toBeNull();
|
||||
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
]);
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
|
||||
expect([result1, result2]).toEqual(
|
||||
expect.arrayContaining([
|
||||
[
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
],
|
||||
null,
|
||||
])
|
||||
);
|
||||
|
||||
await queue.enqueue([
|
||||
{
|
||||
batchKey: '2',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
]);
|
||||
|
||||
await vitest.waitFor(() => {
|
||||
expect([result1, result2]).toEqual(
|
||||
expect.arrayContaining([
|
||||
[
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '1',
|
||||
payload: { a: 'hello' },
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
id: expect.any(String),
|
||||
batchKey: '2',
|
||||
payload: { a: 'world' },
|
||||
},
|
||||
],
|
||||
])
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
);
|
||||
@@ -1,257 +0,0 @@
|
||||
import type { DBSchema, IDBPDatabase } from 'idb';
|
||||
import { openDB } from 'idb';
|
||||
import { merge, Observable, of, throttleTime } from 'rxjs';
|
||||
|
||||
import { fromPromise } from '../../../../livedata';
|
||||
import { throwIfAborted } from '../../../../utils';
|
||||
import { exhaustMapWithTrailing } from '../../../../utils/';
|
||||
import type { Job, JobParams, JobQueue } from '../../';
|
||||
|
||||
interface IndexDB extends DBSchema {
|
||||
jobs: {
|
||||
key: number;
|
||||
value: JobRecord;
|
||||
indexes: {
|
||||
batchKey: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface JobRecord {
|
||||
batchKey: string;
|
||||
startTime: number | null;
|
||||
payload: any;
|
||||
}
|
||||
|
||||
export class IndexedDBJobQueue<J> implements JobQueue<J> {
|
||||
database: IDBPDatabase<IndexDB> = null as any;
|
||||
broadcast = new BroadcastChannel('idb-job-queue:' + this.databaseName);
|
||||
|
||||
constructor(private readonly databaseName: string = 'jobs') {}
|
||||
|
||||
async enqueue(jobs: JobParams[]): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite');
|
||||
|
||||
for (const job of jobs) {
|
||||
await trx.objectStore('jobs').add({
|
||||
batchKey: job.batchKey,
|
||||
payload: job.payload,
|
||||
startTime: null,
|
||||
});
|
||||
}
|
||||
|
||||
trx.commit();
|
||||
|
||||
// send broadcast to notify new jobs
|
||||
this.broadcast.postMessage('new-jobs');
|
||||
}
|
||||
|
||||
async accept(): Promise<Job[] | null> {
|
||||
await this.ensureInitialized();
|
||||
const jobs = [];
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
|
||||
// if no priority jobs
|
||||
|
||||
if (jobs.length === 0) {
|
||||
const batchKeys = trx.objectStore('jobs').index('batchKey').iterate();
|
||||
|
||||
let currentBatchKey: string = null as any;
|
||||
let currentBatchJobs = [];
|
||||
let skipCurrentBatch = false;
|
||||
|
||||
for await (const item of batchKeys) {
|
||||
if (item.value.batchKey !== currentBatchKey) {
|
||||
if (!skipCurrentBatch && currentBatchJobs.length > 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
currentBatchKey = item.value.batchKey;
|
||||
currentBatchJobs = [];
|
||||
skipCurrentBatch = false;
|
||||
}
|
||||
if (skipCurrentBatch) {
|
||||
continue;
|
||||
}
|
||||
if (this.isAcceptable(item.value)) {
|
||||
currentBatchJobs.push({
|
||||
id: item.primaryKey,
|
||||
job: item.value,
|
||||
});
|
||||
} else {
|
||||
skipCurrentBatch = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (skipCurrentBatch === false && currentBatchJobs.length > 0) {
|
||||
jobs.push(...currentBatchJobs);
|
||||
}
|
||||
}
|
||||
|
||||
for (const { id, job } of jobs) {
|
||||
const startTime = Date.now();
|
||||
await trx.objectStore('jobs').put({ ...job, startTime }, id);
|
||||
}
|
||||
|
||||
if (jobs.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return jobs.map(({ id, job }) => ({
|
||||
id: id.toString(),
|
||||
batchKey: job.batchKey,
|
||||
payload: job.payload,
|
||||
}));
|
||||
}
|
||||
|
||||
async waitForAccept(signal: AbortSignal): Promise<Job<J>[]> {
|
||||
const broadcast = new BroadcastChannel(
|
||||
'idb-job-queue:' + this.databaseName
|
||||
);
|
||||
|
||||
try {
|
||||
let deferred = Promise.withResolvers<void>();
|
||||
|
||||
broadcast.onmessage = () => {
|
||||
deferred.resolve();
|
||||
};
|
||||
|
||||
while (throwIfAborted(signal)) {
|
||||
const jobs = await this.accept();
|
||||
if (jobs !== null) {
|
||||
return jobs;
|
||||
}
|
||||
|
||||
await Promise.race([
|
||||
deferred.promise,
|
||||
new Promise(resolve => {
|
||||
setTimeout(resolve, 5000);
|
||||
}),
|
||||
new Promise((_, reject) => {
|
||||
// exit if manually stopped
|
||||
if (signal?.aborted) {
|
||||
reject(signal.reason);
|
||||
}
|
||||
signal?.addEventListener('abort', () => {
|
||||
reject(signal.reason);
|
||||
});
|
||||
}),
|
||||
]);
|
||||
deferred = Promise.withResolvers<void>();
|
||||
}
|
||||
return [];
|
||||
} finally {
|
||||
broadcast.close();
|
||||
}
|
||||
}
|
||||
|
||||
async complete(jobs: Job[]): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
|
||||
for (const { id } of jobs) {
|
||||
await trx
|
||||
.objectStore('jobs')
|
||||
.delete(typeof id === 'string' ? parseInt(id) : id);
|
||||
}
|
||||
|
||||
trx.commit();
|
||||
this.broadcast.postMessage('job-completed');
|
||||
}
|
||||
|
||||
async return(jobs: Job[], retry: boolean = false): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
|
||||
for (const { id } of jobs) {
|
||||
if (retry) {
|
||||
const nid = typeof id === 'string' ? parseInt(id) : id;
|
||||
const job = await trx.objectStore('jobs').get(nid);
|
||||
if (job) {
|
||||
await trx.objectStore('jobs').put({ ...job, startTime: null }, nid);
|
||||
}
|
||||
} else {
|
||||
await trx
|
||||
.objectStore('jobs')
|
||||
.delete(typeof id === 'string' ? parseInt(id) : id);
|
||||
}
|
||||
}
|
||||
|
||||
trx.commit();
|
||||
|
||||
this.broadcast.postMessage('job-completed');
|
||||
}
|
||||
|
||||
async clear(): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readwrite', {
|
||||
durability: 'relaxed',
|
||||
});
|
||||
await trx.objectStore('jobs').clear();
|
||||
}
|
||||
|
||||
private async ensureInitialized(): Promise<void> {
|
||||
if (!this.database) {
|
||||
await this.initialize();
|
||||
}
|
||||
}
|
||||
|
||||
private async initialize(): Promise<void> {
|
||||
if (this.database) {
|
||||
return;
|
||||
}
|
||||
this.database = await openDB(this.databaseName, 1, {
|
||||
upgrade(database) {
|
||||
const jobs = database.createObjectStore('jobs', {
|
||||
autoIncrement: true,
|
||||
});
|
||||
jobs.createIndex('batchKey', 'batchKey');
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
TIMEOUT = 1000 * 30 /* 30 seconds */;
|
||||
|
||||
private isTimeout(job: JobRecord) {
|
||||
return job.startTime !== null && job.startTime + this.TIMEOUT < Date.now();
|
||||
}
|
||||
|
||||
private isAcceptable(job: JobRecord) {
|
||||
return job.startTime === null || this.isTimeout(job);
|
||||
}
|
||||
|
||||
get status$() {
|
||||
return merge(
|
||||
of(1),
|
||||
new Observable(subscriber => {
|
||||
const broadcast = new BroadcastChannel(
|
||||
'idb-job-queue:' + this.databaseName
|
||||
);
|
||||
|
||||
broadcast.onmessage = () => {
|
||||
subscriber.next(1);
|
||||
};
|
||||
return () => {
|
||||
broadcast.close();
|
||||
};
|
||||
})
|
||||
).pipe(
|
||||
throttleTime(300, undefined, { leading: true, trailing: true }),
|
||||
exhaustMapWithTrailing(() =>
|
||||
fromPromise(async () => {
|
||||
await this.ensureInitialized();
|
||||
const trx = this.database.transaction(['jobs'], 'readonly');
|
||||
const remaining = await trx.objectStore('jobs').count();
|
||||
return { remaining };
|
||||
})
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
export * from './queue';
|
||||
export * from './runner';
|
||||
@@ -1,28 +0,0 @@
|
||||
import type { Observable } from 'rxjs';
|
||||
|
||||
export interface JobParams<Payload = any> {
|
||||
batchKey: string;
|
||||
payload: Payload;
|
||||
}
|
||||
|
||||
export interface Job<Payload = any> extends JobParams<Payload> {
|
||||
id: string;
|
||||
}
|
||||
|
||||
export interface JobQueueStatus {
|
||||
remaining: number;
|
||||
}
|
||||
|
||||
export interface JobQueue<Payload> {
|
||||
enqueue(jobs: JobParams<Payload>[]): Promise<void>;
|
||||
|
||||
accept(): Promise<Job<Payload>[] | null>;
|
||||
|
||||
waitForAccept(signal: AbortSignal): Promise<Job<Payload>[]>;
|
||||
|
||||
return(jobs: Job<Payload>[], retry?: boolean): Promise<void>;
|
||||
|
||||
clear(): Promise<void>;
|
||||
|
||||
status$: Observable<JobQueueStatus>;
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
import { DebugLogger } from '@affine/debug';
|
||||
|
||||
import { MANUALLY_STOP, throwIfAborted } from '../../utils';
|
||||
import type { Job, JobQueue } from './queue';
|
||||
|
||||
const logger = new DebugLogger('job-runner');
|
||||
|
||||
export class JobRunner<J> {
|
||||
abort: AbortController | null = null;
|
||||
|
||||
constructor(
|
||||
private readonly queue: JobQueue<J>,
|
||||
private readonly worker: (
|
||||
jobs: Job<J>[],
|
||||
signal: AbortSignal
|
||||
) => Promise<void>,
|
||||
private readonly interval: () => Promise<void> = async () => {}
|
||||
) {}
|
||||
|
||||
start() {
|
||||
this.stop();
|
||||
this.abort = new AbortController();
|
||||
this.loop(this.abort.signal).catch(err => {
|
||||
if (err === MANUALLY_STOP) {
|
||||
return;
|
||||
}
|
||||
logger.error(err);
|
||||
});
|
||||
}
|
||||
|
||||
stop() {
|
||||
this.abort?.abort(MANUALLY_STOP);
|
||||
this.abort = null;
|
||||
}
|
||||
|
||||
async loop(signal: AbortSignal) {
|
||||
while (throwIfAborted(signal)) {
|
||||
const jobs = await this.queue.waitForAccept(signal);
|
||||
|
||||
if (jobs !== null) {
|
||||
try {
|
||||
await this.worker(jobs, signal);
|
||||
await this.queue.return(jobs);
|
||||
} catch (err) {
|
||||
if (err === MANUALLY_STOP) {
|
||||
await this.queue.return(jobs, true);
|
||||
} else {
|
||||
// TODO: retry logic
|
||||
await this.queue.return(jobs);
|
||||
}
|
||||
logger.error(
|
||||
'Error processing jobs',
|
||||
err instanceof Error ? (err.stack ?? err.message) : err
|
||||
);
|
||||
}
|
||||
} else {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
await this.interval();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user