From 8957d0645f3d04124ffd6405f8ec7405235ac64f Mon Sep 17 00:00:00 2001 From: EYHN Date: Mon, 31 Mar 2025 12:59:51 +0000 Subject: [PATCH] feat(nbstore): add indexer storage (#10953) --- packages/common/infra/src/index.ts | 1 - packages/common/infra/src/sync/index.ts | 8 - .../common/infra/src/sync/indexer/README.md | 147 --- .../sync/indexer/__tests__/black-box.spec.ts | 560 --------- .../infra/src/sync/indexer/field-type.ts | 1 - .../indexer/impl/indexeddb/data-struct.ts | 551 --------- .../src/sync/indexer/impl/indexeddb/index.ts | 182 --- .../sync/indexer/impl/memory/data-struct.ts | 290 ----- .../src/sync/indexer/impl/memory/index.ts | 141 --- .../indexer/impl/memory/inverted-index.ts | 220 ---- .../src/sync/indexer/impl/memory/match.ts | 108 -- .../common/infra/src/sync/indexer/index.ts | 6 - .../common/infra/src/sync/indexer/indexer.ts | 41 - .../common/infra/src/sync/indexer/query.ts | 35 - .../common/infra/src/sync/indexer/schema.ts | 25 - .../common/infra/src/sync/indexer/searcher.ts | 83 -- packages/common/infra/src/sync/job/README.md | 47 - .../src/sync/job/__tests__/black-box.spec.ts | 231 ---- .../src/sync/job/impl/indexeddb/index.ts | 257 ---- packages/common/infra/src/sync/job/index.ts | 2 - packages/common/infra/src/sync/job/queue.ts | 28 - packages/common/infra/src/sync/job/runner.ts | 63 - packages/common/nbstore/package.json | 8 +- packages/common/nbstore/src/frontend/index.ts | 1 + .../common/nbstore/src/frontend/indexer.ts | 45 + .../common/nbstore/src/impls/idb/index.ts | 11 + .../nbstore/src/impls/idb/indexer-sync.ts | 38 + .../impls/idb/indexer}/__tests__/bm25.spec.ts | 0 .../indexer}/__tests__/highlighter.spec.ts | 0 .../idb/indexer}/__tests__/tokenizer.spec.ts | 0 .../src/impls/idb/indexer}/bm25.ts | 0 .../src/impls/idb/indexer/data-struct.ts | 493 ++++++++ .../src/impls/idb/indexer}/highlighter.ts | 0 .../nbstore/src/impls/idb/indexer/index.ts | 203 ++++ .../src/impls/idb/indexer}/inverted-index.ts | 103 +- .../src/impls/idb/indexer}/match.ts | 0 .../src/impls/idb/indexer}/tokenizer.ts | 0 .../nbstore/src/impls/idb/indexer/utils.ts | 104 ++ .../common/nbstore/src/impls/idb/schema.ts | 69 +- packages/common/nbstore/src/index.ts | 4 +- packages/common/nbstore/src/storage/doc.ts | 3 +- .../nbstore/src/storage/dummy/indexer-sync.ts | 16 + .../nbstore/src/storage/dummy/indexer.ts | 88 ++ packages/common/nbstore/src/storage/index.ts | 11 +- .../nbstore/src/storage/indexer-sync.ts | 21 + .../common/nbstore/src/storage/indexer.ts | 176 +++ .../src/storage}/indexer/document.ts | 27 +- .../nbstore/src/storage/indexer/field-type.ts | 1 + .../nbstore/src/storage/indexer/query.ts | 35 + .../nbstore/src/storage/indexer/schema.ts | 51 + .../common/nbstore/src/storage/storage.ts | 9 +- packages/common/nbstore/src/sync/blob/peer.ts | 6 +- packages/common/nbstore/src/sync/index.ts | 8 + .../nbstore/src/sync/indexer/crawler.ts | 862 ++++++++++++++ .../common/nbstore/src/sync/indexer/index.ts | 579 +++++++++ .../nbstore/src/utils/priority-queue.ts | 6 +- .../nbstore/src/utils/take-until-abort.ts | 42 + packages/common/nbstore/src/worker/client.ts | 170 ++- .../common/nbstore/src/worker/consumer.ts | 52 +- packages/common/nbstore/src/worker/ops.ts | 43 + packages/common/nbstore/tsconfig.json | 3 +- .../src/background-worker/index.ts | 2 + packages/frontend/apps/ios/src/worker.ts | 2 + .../src/bootstrap/polyfill/set-difference.ts | 1 + .../page-list/page-content-preview.tsx | 7 +- .../components/explorer/nodes/doc/index.tsx | 27 +- .../blob-management/entity/unused-blobs.ts | 19 +- .../docs-search/entities/docs-indexer.ts | 301 ----- .../core/src/modules/docs-search/index.ts | 11 +- .../core/src/modules/docs-search/schema.ts | 35 - .../docs-search/services/docs-search.ts | 135 +-- .../modules/docs-search/worker/in-worker.ts | 1037 ----------------- .../modules/docs-search/worker/out-worker.ts | 105 -- .../src/modules/docs-search/worker/types.ts | 53 - .../explorer/views/nodes/doc/index.tsx | 27 +- .../src/modules/quicksearch/impls/docs.ts | 9 +- .../src/modules/search-menu/services/index.ts | 16 +- .../modules/workspace-engine/impls/cloud.ts | 16 + .../modules/workspace-engine/impls/local.ts | 16 + .../src/modules/workspace/entities/engine.ts | 7 + tools/utils/src/workspace.gen.ts | 1 + yarn.lock | 4 + 82 files changed, 3393 insertions(+), 4753 deletions(-) delete mode 100644 packages/common/infra/src/sync/index.ts delete mode 100644 packages/common/infra/src/sync/indexer/README.md delete mode 100644 packages/common/infra/src/sync/indexer/__tests__/black-box.spec.ts delete mode 100644 packages/common/infra/src/sync/indexer/field-type.ts delete mode 100644 packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts delete mode 100644 packages/common/infra/src/sync/indexer/impl/indexeddb/index.ts delete mode 100644 packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts delete mode 100644 packages/common/infra/src/sync/indexer/impl/memory/index.ts delete mode 100644 packages/common/infra/src/sync/indexer/impl/memory/inverted-index.ts delete mode 100644 packages/common/infra/src/sync/indexer/impl/memory/match.ts delete mode 100644 packages/common/infra/src/sync/indexer/index.ts delete mode 100644 packages/common/infra/src/sync/indexer/indexer.ts delete mode 100644 packages/common/infra/src/sync/indexer/query.ts delete mode 100644 packages/common/infra/src/sync/indexer/schema.ts delete mode 100644 packages/common/infra/src/sync/indexer/searcher.ts delete mode 100644 packages/common/infra/src/sync/job/README.md delete mode 100644 packages/common/infra/src/sync/job/__tests__/black-box.spec.ts delete mode 100644 packages/common/infra/src/sync/job/impl/indexeddb/index.ts delete mode 100644 packages/common/infra/src/sync/job/index.ts delete mode 100644 packages/common/infra/src/sync/job/queue.ts delete mode 100644 packages/common/infra/src/sync/job/runner.ts create mode 100644 packages/common/nbstore/src/frontend/indexer.ts create mode 100644 packages/common/nbstore/src/impls/idb/indexer-sync.ts rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/__tests__/bm25.spec.ts (100%) rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/__tests__/highlighter.spec.ts (100%) rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/__tests__/tokenizer.spec.ts (100%) rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/bm25.ts (100%) create mode 100644 packages/common/nbstore/src/impls/idb/indexer/data-struct.ts rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/highlighter.ts (100%) create mode 100644 packages/common/nbstore/src/impls/idb/indexer/index.ts rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/inverted-index.ts (83%) rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/match.ts (100%) rename packages/common/{infra/src/sync/indexer/impl/indexeddb => nbstore/src/impls/idb/indexer}/tokenizer.ts (100%) create mode 100644 packages/common/nbstore/src/impls/idb/indexer/utils.ts create mode 100644 packages/common/nbstore/src/storage/dummy/indexer-sync.ts create mode 100644 packages/common/nbstore/src/storage/dummy/indexer.ts create mode 100644 packages/common/nbstore/src/storage/indexer-sync.ts create mode 100644 packages/common/nbstore/src/storage/indexer.ts rename packages/common/{infra/src/sync => nbstore/src/storage}/indexer/document.ts (55%) create mode 100644 packages/common/nbstore/src/storage/indexer/field-type.ts create mode 100644 packages/common/nbstore/src/storage/indexer/query.ts create mode 100644 packages/common/nbstore/src/storage/indexer/schema.ts create mode 100644 packages/common/nbstore/src/sync/indexer/crawler.ts create mode 100644 packages/common/nbstore/src/sync/indexer/index.ts create mode 100644 packages/common/nbstore/src/utils/take-until-abort.ts create mode 100644 packages/frontend/core/src/bootstrap/polyfill/set-difference.ts delete mode 100644 packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts delete mode 100644 packages/frontend/core/src/modules/docs-search/schema.ts delete mode 100644 packages/frontend/core/src/modules/docs-search/worker/in-worker.ts delete mode 100644 packages/frontend/core/src/modules/docs-search/worker/out-worker.ts delete mode 100644 packages/frontend/core/src/modules/docs-search/worker/types.ts diff --git a/packages/common/infra/src/index.ts b/packages/common/infra/src/index.ts index 993b105c4a..8f959bd7cc 100644 --- a/packages/common/infra/src/index.ts +++ b/packages/common/infra/src/index.ts @@ -5,5 +5,4 @@ export * from './livedata'; export * from './media'; export * from './orm'; export * from './storage'; -export * from './sync'; export * from './utils'; diff --git a/packages/common/infra/src/sync/index.ts b/packages/common/infra/src/sync/index.ts deleted file mode 100644 index bf2553b3d2..0000000000 --- a/packages/common/infra/src/sync/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -export * from './indexer'; -export { - IndexedDBIndex, - IndexedDBIndexStorage, -} from './indexer/impl/indexeddb'; -export { MemoryIndex, MemoryIndexStorage } from './indexer/impl/memory'; -export * from './job'; -export { IndexedDBJobQueue } from './job/impl/indexeddb'; diff --git a/packages/common/infra/src/sync/indexer/README.md b/packages/common/infra/src/sync/indexer/README.md deleted file mode 100644 index dc258fac7b..0000000000 --- a/packages/common/infra/src/sync/indexer/README.md +++ /dev/null @@ -1,147 +0,0 @@ -# index - -Search engine abstraction layer for AFFiNE. - -## Using - -1. Define schema - -First, we need to define the shape of the data. Currently, there are the following data types. - -- 'Integer' -- 'Boolean' -- 'FullText': for full-text search, it will be tokenized and stemmed. -- 'String': for exact match search, e.g. tags, ids. - -```typescript -const schema = defineSchema({ - title: 'FullText', - tag: 'String', - size: 'Integer', -}); -``` - -> **Array type** -> All types can contain one or more values, so each field can store an array. - -2. Pick a backend - -Currently, there are two backends available. - -- `MemoryIndex`: in-memory indexer, useful for testing. -- `IndexedDBIndex`: persistent indexer using IndexedDB. - -> **Underlying Data Table** -> Some back-end processes need to maintain underlying data tables, including table creation and migration. This operation should be silently executed the first time the indexer is invoked. -> Callers do not need to worry about these details. -> -> This design conforms to the usual conventions of search engine APIs, such as in Elasticsearch: https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html - -3. Write data - -Write data to the indexer. you need to start a write transaction by `await index.write()` first and then complete the batch write through `await writer.commit()`. - -> **Transactional** -> Typically, the indexer does not provide transactional guarantees; reliable locking logic needs to be implemented at a higher level. - -```typescript -const indexer = new IndexedDBIndex(schema); - -const writer = await index.write(); -writer.insert( - Document.from('id', { - title: 'hello world', - tag: ['doc', 'page'], - size: '100', - }) -); -await writer.commit(); -``` - -4. Search data - -To search for content in the indexer, you need to use a specific **query language**. Here are some examples: - -```typescript -// match title == 'hello world' -{ - type: 'match', - field: 'title', - match: 'hello world', -} - -// match title == 'hello world' && tag == 'doc' -{ - type: 'boolean', - occur: 'must', - queries: [ - { - type: 'match', - field: 'title', - match: 'hello world', - }, - { - type: 'match', - field: 'tag', - match: 'doc', - }, - ], -} -``` - -There are two ways to perform the search, `index.search()` and `index.aggregate()`. - -- **search**: return each matched node and pagination information. -- **aggregate**: aggregate all matched results based on a certain field into buckets, and return the count and score of items in each bucket. - -Examples: - -```typescript -const result = await index.search({ - type: 'match', - field: 'title', - match: 'hello world', -}); -// result = { -// nodes: [ -// { -// id: '1', -// score: 1, -// }, -// ], -// pagination: { -// count: 1, -// hasMore: false, -// limit: 10, -// skip: 0, -// }, -// } -``` - -```typescript -const result = await index.aggregate( - { - type: 'match', - field: 'title', - match: 'affine', - }, - 'tag' -); -// result = { -// buckets: [ -// { key: 'motorcycle', count: 2, score: 1 }, -// { key: 'bike', count: 1, score: 1 }, -// { key: 'airplane', count: 1, score: 1 }, -// ], -// pagination: { -// count: 3, -// hasMore: false, -// limit: 10, -// skip: 0, -// }, -// } -``` - -More uses: - -[black-box.spec.ts](./__tests__/black-box.spec.ts) diff --git a/packages/common/infra/src/sync/indexer/__tests__/black-box.spec.ts b/packages/common/infra/src/sync/indexer/__tests__/black-box.spec.ts deleted file mode 100644 index 0f48aa04b7..0000000000 --- a/packages/common/infra/src/sync/indexer/__tests__/black-box.spec.ts +++ /dev/null @@ -1,560 +0,0 @@ -/** - * @vitest-environment happy-dom - */ -import 'fake-indexeddb/auto'; - -import { map } from 'rxjs'; -import { beforeEach, describe, expect, test, vitest } from 'vitest'; - -import { defineSchema, Document, type Index } from '..'; -import { IndexedDBIndex } from '../impl/indexeddb'; -import { MemoryIndex } from '../impl/memory'; - -const schema = defineSchema({ - title: 'FullText', - tag: 'String', - size: 'Integer', -}); - -let index: Index = null!; - -describe.each([ - { name: 'memory', backend: MemoryIndex }, - { name: 'idb', backend: IndexedDBIndex }, -])('index tests($name)', ({ backend }) => { - async function writeData( - data: Record< - string, - Partial> - > - ) { - const writer = await index.write(); - for (const [id, item] of Object.entries(data)) { - const doc = new Document(id); - for (const [key, value] of Object.entries(item)) { - if (Array.isArray(value)) { - for (const v of value) { - doc.insert(key, v); - } - } else { - doc.insert(key, value); - } - } - writer.insert(doc); - } - await writer.commit(); - } - - beforeEach(async () => { - index = new backend(schema); - await index.clear(); - }); - - test('basic', async () => { - await writeData({ - '1': { - title: 'hello world', - }, - }); - - const result = await index.search({ - type: 'match', - field: 'title', - match: 'hello world', - }); - - expect(result).toEqual({ - nodes: [ - { - id: '1', - score: expect.anything(), - }, - ], - pagination: { - count: 1, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test('basic integer', async () => { - await writeData({ - '1': { - title: 'hello world', - size: '100', - }, - }); - - const result = await index.search({ - type: 'match', - field: 'size', - match: '100', - }); - - expect(result).toEqual({ - nodes: [ - { - id: '1', - score: expect.anything(), - }, - ], - pagination: { - count: 1, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test('fuzz', async () => { - await writeData({ - '1': { - title: 'hello world', - }, - }); - const result = await index.search({ - type: 'match', - field: 'title', - match: 'hell', - }); - - expect(result).toEqual({ - nodes: [ - { - id: '1', - score: expect.anything(), - }, - ], - pagination: { - count: 1, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test('highlight', async () => { - await writeData({ - '1': { - title: 'hello world', - size: '100', - }, - }); - - const result = await index.search( - { - type: 'match', - field: 'title', - match: 'hello', - }, - { - highlights: [ - { - field: 'title', - before: '', - end: '', - }, - ], - } - ); - - expect(result).toEqual({ - nodes: expect.arrayContaining([ - { - id: '1', - score: expect.anything(), - highlights: { - title: [expect.stringContaining('hello')], - }, - }, - ]), - pagination: { - count: 1, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test('fields', async () => { - await writeData({ - '1': { - title: 'hello world', - tag: ['car', 'bike'], - }, - }); - - const result = await index.search( - { - type: 'match', - field: 'title', - match: 'hello', - }, - { - fields: ['title', 'tag'], - } - ); - - expect(result.nodes[0].fields).toEqual({ - title: 'hello world', - tag: expect.arrayContaining(['bike', 'car']), - }); - }); - - test('pagination', async () => { - await writeData( - Array.from({ length: 100 }).reduce((acc: any, _, i) => { - acc['apple' + i] = { - tag: ['apple'], - }; - return acc; - }, {}) as any - ); - - const result = await index.search( - { - type: 'match', - field: 'tag', - match: 'apple', - }, - { - pagination: { - skip: 0, - limit: 10, - }, - } - ); - - expect(result).toEqual({ - nodes: expect.arrayContaining( - Array.from({ length: 10 }).fill({ - id: expect.stringContaining('apple'), - score: expect.anything(), - }) - ), - pagination: { - count: 100, - hasMore: true, - limit: 10, - skip: 0, - }, - }); - - const result2 = await index.search( - { - type: 'match', - field: 'tag', - match: 'apple', - }, - { - pagination: { - skip: 10, - limit: 10, - }, - } - ); - - expect(result2).toEqual({ - nodes: expect.arrayContaining( - Array.from({ length: 10 }).fill({ - id: expect.stringContaining('apple'), - score: expect.anything(), - }) - ), - pagination: { - count: 100, - hasMore: true, - limit: 10, - skip: 10, - }, - }); - }); - - test('aggr', async () => { - await writeData({ - '1': { - title: 'hello world', - tag: ['car', 'bike'], - }, - affine1: { - title: 'affine', - tag: ['motorcycle', 'bike'], - }, - affine2: { - title: 'affine', - tag: ['motorcycle', 'airplane'], - }, - }); - - const result = await index.aggregate( - { - type: 'match', - field: 'title', - match: 'affine', - }, - 'tag' - ); - - expect(result).toEqual({ - buckets: expect.arrayContaining([ - { key: 'motorcycle', count: 2, score: expect.anything() }, - { key: 'bike', count: 1, score: expect.anything() }, - { key: 'airplane', count: 1, score: expect.anything() }, - ]), - pagination: { - count: 3, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test('hits', async () => { - await writeData( - Array.from({ length: 100 }).reduce((acc: any, _, i) => { - acc['apple' + i] = { - title: 'apple', - tag: ['apple', 'fruit'], - }; - return acc; - }, {}) as any - ); - const result = await index.aggregate( - { - type: 'match', - field: 'title', - match: 'apple', - }, - 'tag', - { - hits: { - pagination: { - skip: 0, - limit: 5, - }, - highlights: [ - { - field: 'title', - before: '', - end: '', - }, - ], - fields: ['title', 'tag'], - }, - } - ); - - expect(result).toEqual({ - buckets: expect.arrayContaining([ - { - key: 'apple', - count: 100, - score: expect.anything(), - hits: { - pagination: { - count: 100, - hasMore: true, - limit: 5, - skip: 0, - }, - nodes: expect.arrayContaining( - Array.from({ length: 5 }).fill({ - id: expect.stringContaining('apple'), - score: expect.anything(), - highlights: { - title: [expect.stringContaining('apple')], - }, - fields: { - title: expect.stringContaining('apple'), - tag: expect.arrayContaining(['apple', 'fruit']), - }, - }) - ), - }, - }, - { - key: 'fruit', - count: 100, - score: expect.anything(), - hits: { - pagination: { - count: 100, - hasMore: true, - limit: 5, - skip: 0, - }, - nodes: expect.arrayContaining( - Array.from({ length: 5 }).fill({ - id: expect.stringContaining('apple'), - score: expect.anything(), - highlights: { - title: [expect.stringContaining('apple')], - }, - fields: { - title: expect.stringContaining('apple'), - tag: expect.arrayContaining(['apple', 'fruit']), - }, - }) - ), - }, - }, - ]), - pagination: { - count: 2, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test('exists', async () => { - await writeData({ - '1': { - title: 'hello world', - tag: '111', - }, - '2': { - tag: '222', - }, - '3': { - title: 'hello world', - tag: '333', - }, - }); - - const result = await index.search({ - type: 'exists', - field: 'title', - }); - - expect(result).toEqual({ - nodes: expect.arrayContaining([ - { - id: '1', - score: expect.anything(), - }, - { - id: '3', - score: expect.anything(), - }, - ]), - pagination: { - count: 2, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }); - - test( - 'subscribe', - { - timeout: 30000, - }, - async () => { - await writeData({ - '1': { - title: 'hello world', - }, - }); - - let value = null as any; - index - .search$({ - type: 'match', - field: 'title', - match: 'hello world', - }) - .pipe(map(v => (value = v))) - .subscribe(); - - await vitest.waitFor( - () => { - expect(value).toEqual({ - nodes: [ - { - id: '1', - score: expect.anything(), - }, - ], - pagination: { - count: 1, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }, - { - timeout: 10000, - } - ); - - await writeData({ - '2': { - title: 'hello world', - }, - }); - - await vitest.waitFor( - () => { - expect(value).toEqual({ - nodes: [ - { - id: '1', - score: expect.anything(), - }, - { - id: '2', - score: expect.anything(), - }, - ], - pagination: { - count: 2, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }, - { - timeout: 10000, - } - ); - - const writer = await index.write(); - writer.delete('1'); - await writer.commit(); - - await vitest.waitFor( - () => { - expect(value).toEqual({ - nodes: [ - { - id: '2', - score: expect.anything(), - }, - ], - pagination: { - count: 1, - hasMore: false, - limit: expect.anything(), - skip: 0, - }, - }); - }, - { - timeout: 10000, - } - ); - } - ); -}); diff --git a/packages/common/infra/src/sync/indexer/field-type.ts b/packages/common/infra/src/sync/indexer/field-type.ts deleted file mode 100644 index 2953440cdf..0000000000 --- a/packages/common/infra/src/sync/indexer/field-type.ts +++ /dev/null @@ -1 +0,0 @@ -export type FieldType = 'Integer' | 'FullText' | 'String' | 'Boolean'; diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts b/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts deleted file mode 100644 index 49eef7af4d..0000000000 --- a/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts +++ /dev/null @@ -1,551 +0,0 @@ -import { DebugLogger } from '@affine/debug'; -import { - type DBSchema, - type IDBPDatabase, - type IDBPTransaction, - openDB, - type StoreNames, -} from 'idb'; - -import { - type AggregateOptions, - type AggregateResult, - Document, - type Query, - type Schema, - type SearchOptions, - type SearchResult, -} from '../../'; -import { highlighter } from './highlighter'; -import { - BooleanInvertedIndex, - FullTextInvertedIndex, - IntegerInvertedIndex, - type InvertedIndex, - StringInvertedIndex, -} from './inverted-index'; -import { Match } from './match'; - -const logger = new DebugLogger('indexeddb'); - -export interface IndexDB extends DBSchema { - kvMetadata: { - key: string; - value: { - key: string; - value: any; - }; - }; - records: { - key: number; - value: { - id: string; - data: Map; - }; - indexes: { id: string }; - }; - invertedIndex: { - key: number; - value: { - nid: number; - pos?: { - i: number /* index */; - l: number /* length */; - rs: [number, number][] /* ranges: [start, end] */; - }; - key: ArrayBuffer; - }; - indexes: { key: ArrayBuffer; nid: number }; - }; -} - -export type DataStructRWTransaction = IDBPTransaction< - IndexDB, - ArrayLike>, - 'readwrite' ->; - -export type DataStructROTransaction = IDBPTransaction< - IndexDB, - ArrayLike>, - 'readonly' | 'readwrite' ->; - -export class DataStruct { - private initializePromise: Promise | null = null; - database: IDBPDatabase = null as any; - invertedIndex = new Map(); - - constructor( - readonly databaseName: string, - readonly schema: Schema - ) { - for (const [key, type] of Object.entries(schema)) { - const typeInfo = typeof type === 'string' ? { type } : type; - if (typeInfo.index === false) { - // If index is false, we don't need to create an inverted index for this field. - continue; - } - if (typeInfo.type === 'String') { - this.invertedIndex.set(key, new StringInvertedIndex(key)); - } else if (typeInfo.type === 'Integer') { - this.invertedIndex.set(key, new IntegerInvertedIndex(key)); - } else if (typeInfo.type === 'FullText') { - this.invertedIndex.set(key, new FullTextInvertedIndex(key)); - } else if (type === 'Boolean') { - this.invertedIndex.set(key, new BooleanInvertedIndex(key)); - } else { - throw new Error(`Field type '${type}' not supported`); - } - } - } - - private async insert(trx: DataStructRWTransaction, document: Document) { - const exists = await trx - .objectStore('records') - .index('id') - .get(document.id); - - if (exists) { - throw new Error('Document already exists'); - } - - const dataMap = new Map(); - - for (const [key, values] of document.fields) { - const type = this.schema[key as string]; - if (!type) { - continue; - } - const typeInfo = typeof type === 'string' ? { type } : type; - if (typeInfo.store !== false) { - // If store is false, the field will not be stored - dataMap.set(key, values); - } - } - - const nid = await trx.objectStore('records').put({ - id: document.id, - data: dataMap, - }); - - for (const [key, values] of document.fields) { - const iidx = this.invertedIndex.get(key as string); - if (!iidx) { - continue; - } - await iidx.insert(trx, nid, values); - } - } - - private async delete(trx: DataStructRWTransaction, id: string) { - const nid = await trx.objectStore('records').index('id').getKey(id); - - if (nid) { - await trx.objectStore('records').delete(nid); - } else { - return; - } - - const indexIds = await trx - .objectStore('invertedIndex') - .index('nid') - .getAllKeys(nid); - - for (const indexId of indexIds) { - await trx.objectStore('invertedIndex').delete(indexId); - } - } - - async batchWrite( - trx: DataStructRWTransaction, - deletes: string[], - inserts: Document[] - ) { - const startTime = performance.now(); - try { - for (const del of deletes) { - await this.delete(trx, del); - } - for (const inst of inserts) { - await this.insert(trx, inst); - } - } finally { - const endTime = performance.now(); - if (BUILD_CONFIG.debug) { - performance.measure( - `[IndexedDB Indexer] Batch Write (${this.databaseName})`, - { - start: startTime, - end: endTime, - } - ); - } - logger.debug( - `[indexer ${this.databaseName}] batchWrite`, - endTime - startTime, - 'ms' - ); - } - } - - async matchAll(trx: DataStructROTransaction): Promise { - const allNids = await trx.objectStore('records').getAllKeys(); - const match = new Match(); - - for (const nid of allNids) { - match.addScore(nid, 1); - } - return match; - } - - private async queryRaw( - trx: DataStructROTransaction, - query: Query - ): Promise { - if (query.type === 'match') { - const iidx = this.invertedIndex.get(query.field as string); - if (!iidx) { - return new Match(); - } - return await iidx.match(trx, query.match); - } else if (query.type === 'boolean') { - const weights = []; - for (const q of query.queries) { - weights.push(await this.queryRaw(trx, q)); - } - if (query.occur === 'must') { - return weights.reduce((acc, w) => acc.and(w)); - } else if (query.occur === 'must_not') { - const total = weights.reduce((acc, w) => acc.and(w)); - return (await this.matchAll(trx)).exclude(total); - } else if (query.occur === 'should') { - return weights.reduce((acc, w) => acc.or(w)); - } - } else if (query.type === 'all') { - return await this.matchAll(trx); - } else if (query.type === 'boost') { - return (await this.queryRaw(trx, query.query)).boost(query.boost); - } else if (query.type === 'exists') { - const iidx = this.invertedIndex.get(query.field as string); - if (!iidx) { - return new Match(); - } - return await iidx.all(trx); - } - throw new Error(`Query type '${query.type}' not supported`); - } - - async clear(trx: DataStructRWTransaction) { - await trx.objectStore('records').clear(); - await trx.objectStore('invertedIndex').clear(); - await trx.objectStore('kvMetadata').clear(); - } - - async search( - trx: DataStructROTransaction, - query: Query, - options: SearchOptions - ): Promise> { - const startTime = performance.now(); - try { - const pagination = { - skip: options.pagination?.skip ?? 0, - limit: options.pagination?.limit ?? 100, - }; - - const match = await this.queryRaw(trx, query); - - const nids = match - .toArray() - .slice(pagination.skip, pagination.skip + pagination.limit); - - const nodes = []; - for (const nid of nids) { - const record = await trx.objectStore('records').get(nid); - if (!record) { - continue; - } - nodes.push(this.resultNode(record, options, match, nid)); - } - - return { - pagination: { - count: match.size(), - hasMore: match.size() > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - nodes: nodes, - }; - } finally { - const endTime = performance.now(); - if (BUILD_CONFIG.debug) { - performance.measure( - `[IndexedDB Indexer] Search (${this.databaseName})`, - { - detail: { query, options }, - start: startTime, - end: endTime, - } - ); - } - logger.debug( - `[indexer ${this.databaseName}] search`, - endTime - startTime, - 'ms', - query - ); - } - } - - async aggregate( - trx: DataStructROTransaction, - query: Query, - field: string, - options: AggregateOptions - ): Promise> { - const startTime = performance.now(); - try { - const pagination = { - skip: options.pagination?.skip ?? 0, - limit: options.pagination?.limit ?? 100, - }; - - const hitPagination = options.hits - ? { - skip: options.hits.pagination?.skip ?? 0, - limit: options.hits.pagination?.limit ?? 3, - } - : { - skip: 0, - limit: 0, - }; - - const match = await this.queryRaw(trx, query); - - const nids = match.toArray(); - - const buckets: { - key: string; - nids: number[]; - hits: SearchResult['nodes']; - }[] = []; - - for (const nid of nids) { - const record = await trx.objectStore('records').get(nid); - if (!record) { - continue; - } - const values = record.data.get(field); - for (const value of values ?? []) { - let bucket; - let bucketIndex = buckets.findIndex(b => b.key === value); - if (bucketIndex === -1) { - bucket = { key: value, nids: [], hits: [] }; - buckets.push(bucket); - bucketIndex = buckets.length - 1; - } else { - bucket = buckets[bucketIndex]; - } - - if ( - bucketIndex >= pagination.skip && - bucketIndex < pagination.skip + pagination.limit - ) { - bucket.nids.push(nid); - if ( - bucket.nids.length - 1 >= hitPagination.skip && - bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit - ) { - bucket.hits.push( - this.resultNode(record, options.hits ?? {}, match, nid) - ); - } - } - } - } - - return { - buckets: buckets - .slice(pagination.skip, pagination.skip + pagination.limit) - .map(bucket => { - const result = { - key: bucket.key, - score: match.getScore(bucket.nids[0]), - count: bucket.nids.length, - } as AggregateResult['buckets'][number]; - - if (options.hits) { - (result as any).hits = { - pagination: { - count: bucket.nids.length, - hasMore: - bucket.nids.length > - hitPagination.limit + hitPagination.skip, - limit: hitPagination.limit, - skip: hitPagination.skip, - }, - nodes: bucket.hits, - } as SearchResult; - } - - return result; - }), - pagination: { - count: buckets.length, - hasMore: buckets.length > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - }; - } finally { - const endTime = performance.now(); - if (BUILD_CONFIG.debug) { - performance.measure( - `[IndexedDB Indexer] Aggregate (${this.databaseName})`, - { - detail: { query, field, options }, - start: startTime, - end: endTime, - } - ); - } - logger.debug( - `[indexer ${this.databaseName}] aggregate`, - endTime - startTime, - 'ms' - ); - } - } - - async getAll( - trx: DataStructROTransaction, - ids?: string[] - ): Promise { - const docs = []; - if (ids) { - for (const id of ids) { - const record = await trx.objectStore('records').index('id').get(id); - if (record) { - docs.push(Document.from(record.id, record.data)); - } - } - } else { - const records = await trx.objectStore('records').getAll(); - for (const record of records) { - docs.push(Document.from(record.id, record.data)); - } - } - - return docs; - } - - async has(trx: DataStructROTransaction, id: string): Promise { - const nid = await trx.objectStore('records').index('id').getKey(id); - return nid !== undefined; - } - - async readonly() { - await this.ensureInitialized(); - return this.database.transaction( - ['records', 'invertedIndex', 'kvMetadata'], - 'readonly', - { - durability: 'relaxed', - } - ); - } - - async readwrite() { - await this.ensureInitialized(); - return this.database.transaction( - ['records', 'invertedIndex', 'kvMetadata'], - 'readwrite', - { - durability: 'relaxed', - } - ); - } - - private async ensureInitialized() { - if (this.database) { - return; - } - this.initializePromise ??= this.initialize(); - await this.initializePromise; - } - - private async initialize() { - this.database = await openDB(this.databaseName, 1, { - upgrade(database) { - database.createObjectStore('kvMetadata', { - keyPath: 'key', - }); - const recordsStore = database.createObjectStore('records', { - autoIncrement: true, - }); - recordsStore.createIndex('id', 'id', { - unique: true, - }); - const invertedIndexStore = database.createObjectStore('invertedIndex', { - autoIncrement: true, - }); - invertedIndexStore.createIndex('key', 'key', { unique: false }); - invertedIndexStore.createIndex('nid', 'nid', { unique: false }); - }, - }); - } - - private resultNode( - record: { id: string; data: Map }, - options: SearchOptions, - match?: Match, - nid?: number - ): SearchResult['nodes'][number] { - const node = { - id: record.id, - score: match && nid ? match.getScore(nid) : 1, - } as any; - - if (options.fields) { - const fields = {} as Record; - for (const field of options.fields as string[]) { - fields[field] = record.data.get(field) ?? ['']; - if (fields[field].length === 1) { - fields[field] = fields[field][0]; - } - } - node.fields = fields; - } - - if (match && nid && options.highlights) { - const highlights = {} as Record; - for (const { field, before, end } of options.highlights) { - const highlightValues = match.getHighlighters(nid, field); - if (highlightValues) { - const rawValues = record.data.get(field) ?? []; - highlights[field] = Array.from(highlightValues) - .map(([index, ranges]) => { - const raw = rawValues[index]; - - if (raw) { - return ( - highlighter(raw, before, end, ranges, { - maxPrefix: 20, - maxLength: 50, - }) ?? '' - ); - } - - return ''; - }) - .filter(Boolean); - } - } - node.highlights = highlights; - } - - return node; - } -} diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/index.ts b/packages/common/infra/src/sync/indexer/impl/indexeddb/index.ts deleted file mode 100644 index 269f6df3ec..0000000000 --- a/packages/common/infra/src/sync/indexer/impl/indexeddb/index.ts +++ /dev/null @@ -1,182 +0,0 @@ -import { DebugLogger } from '@affine/debug'; -import type { Observable } from 'rxjs'; -import { merge, of, Subject, throttleTime } from 'rxjs'; - -import { backoffRetry, fromPromise } from '../../../../livedata'; -import { exhaustMapWithTrailing } from '../../../../utils/'; -import { - type AggregateOptions, - type AggregateResult, - type Document, - type Index, - type IndexStorage, - type IndexWriter, - type Query, - type Schema, - type SearchOptions, - type SearchResult, -} from '../../'; -import { DataStruct, type DataStructRWTransaction } from './data-struct'; - -const logger = new DebugLogger('IndexedDBIndex'); - -export class IndexedDBIndex implements Index { - data: DataStruct = new DataStruct(this.databaseName, this.schema); - broadcast$ = new Subject(); - - constructor( - private readonly schema: S, - private readonly databaseName: string = 'indexer' - ) { - const channel = new BroadcastChannel(this.databaseName + ':indexer'); - channel.onmessage = () => { - this.broadcast$.next(1); - }; - } - - async get(id: string): Promise | null> { - return (await this.getAll([id]))[0] ?? null; - } - - async getAll(ids: string[]): Promise[]> { - const trx = await this.data.readonly(); - return this.data.getAll(trx, ids); - } - - async write(): Promise> { - return new IndexedDBIndexWriter(this.data, await this.data.readwrite()); - } - - async has(id: string): Promise { - const trx = await this.data.readonly(); - return this.data.has(trx, id); - } - - async search( - query: Query, - options: SearchOptions = {} - ): Promise>> { - const trx = await this.data.readonly(); - return this.data.search(trx, query, options); - } - - search$( - query: Query, - options: SearchOptions = {} - ): Observable>> { - return merge(of(1), this.broadcast$).pipe( - throttleTime(3000, undefined, { leading: true, trailing: true }), - exhaustMapWithTrailing(() => { - return fromPromise(async () => { - try { - const trx = await this.data.readonly(); - return await this.data.search(trx, query, options); - } catch (error) { - logger.error('search error', error); - throw error; - } - }).pipe(backoffRetry()); - }) - ); - } - - async aggregate( - query: Query, - field: string, - options: AggregateOptions = {} - ): Promise>> { - const trx = await this.data.readonly(); - return this.data.aggregate(trx, query, field, options); - } - - aggregate$( - query: Query, - field: string, - options: AggregateOptions = {} - ): Observable>> { - return merge(of(1), this.broadcast$).pipe( - throttleTime(3000, undefined, { leading: true, trailing: true }), - exhaustMapWithTrailing(() => { - return fromPromise(async () => { - try { - const trx = await this.data.readonly(); - return await this.data.aggregate(trx, query, field, options); - } catch (error) { - logger.error('aggregate error', error); - throw error; - } - }).pipe(backoffRetry()); - }) - ); - } - - async clear(): Promise { - const trx = await this.data.readwrite(); - return this.data.clear(trx); - } -} - -export class IndexedDBIndexWriter implements IndexWriter { - inserts: Document[] = []; - deletes: string[] = []; - channel = new BroadcastChannel(this.data.databaseName + ':indexer'); - - constructor( - private readonly data: DataStruct, - private readonly trx: DataStructRWTransaction - ) {} - - async get(id: string): Promise | null> { - return (await this.getAll([id]))[0] ?? null; - } - - async getAll(ids?: string[]): Promise[]> { - const trx = await this.data.readonly(); - return this.data.getAll(trx, ids); - } - - insert(document: Document): void { - this.inserts.push(document); - } - delete(id: string): void { - this.deletes.push(id); - } - put(document: Document): void { - this.delete(document.id); - this.insert(document); - } - - async commit(): Promise { - await this.data.batchWrite(this.trx, this.deletes, this.inserts); - this.trx.commit(); - this.channel.postMessage(1); - } - - rollback(): void {} - - has(id: string): Promise { - return this.data.has(this.trx, id); - } - - async search( - query: Query, - options: SearchOptions = {} - ): Promise>> { - return this.data.search(this.trx, query, options); - } - - async aggregate( - query: Query, - field: string, - options: AggregateOptions = {} - ): Promise>> { - return this.data.aggregate(this.trx, query, field, options); - } -} - -export class IndexedDBIndexStorage implements IndexStorage { - constructor(private readonly databaseName: string) {} - getIndex(name: string, s: S): Index { - return new IndexedDBIndex(s, this.databaseName + ':' + name); - } -} diff --git a/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts b/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts deleted file mode 100644 index a22ced413c..0000000000 --- a/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts +++ /dev/null @@ -1,290 +0,0 @@ -import { - type AggregateOptions, - type AggregateResult, - Document, - type Query, - type Schema, - type SearchOptions, - type SearchResult, -} from '../../'; -import { - BooleanInvertedIndex, - FullTextInvertedIndex, - IntegerInvertedIndex, - type InvertedIndex, - StringInvertedIndex, -} from './inverted-index'; -import { Match } from './match'; - -type DataRecord = { - id: string; - data: Map; - deleted: boolean; -}; - -export class DataStruct { - records: DataRecord[] = []; - - idMap = new Map(); - - invertedIndex = new Map(); - - constructor(schema: Schema) { - for (const [key, type] of Object.entries(schema)) { - const typeInfo = typeof type === 'string' ? { type } : type; - - if (typeInfo.type === 'String') { - this.invertedIndex.set(key, new StringInvertedIndex(key)); - } else if (typeInfo.type === 'Integer') { - this.invertedIndex.set(key, new IntegerInvertedIndex(key)); - } else if (typeInfo.type === 'FullText') { - this.invertedIndex.set(key, new FullTextInvertedIndex(key)); - } else if (typeInfo.type === 'Boolean') { - this.invertedIndex.set(key, new BooleanInvertedIndex(key)); - } else { - throw new Error(`Field type '${type}' not supported`); - } - } - } - - getAll(ids?: string[]): Document[] { - if (ids) { - return ids - .map(id => { - const nid = this.idMap.get(id); - if (nid === undefined) { - return undefined; - } - return Document.from(id, this.records[nid].data); - }) - .filter((v): v is Document => v !== undefined); - } else { - return this.records - .filter(record => !record.deleted) - .map(record => Document.from(record.id, record.data)); - } - } - - insert(document: Document) { - if (this.idMap.has(document.id)) { - throw new Error('Document already exists'); - } - - this.records.push({ - id: document.id, - data: document.fields as Map, - deleted: false, - }); - - const nid = this.records.length - 1; - this.idMap.set(document.id, nid); - for (const [key, values] of document.fields) { - for (const value of values) { - const iidx = this.invertedIndex.get(key as string); - if (!iidx) { - throw new Error( - `Inverted index '${key.toString()}' not found, document not match schema` - ); - } - iidx.insert(nid, value); - } - } - } - - delete(id: string) { - const nid = this.idMap.get(id); - if (nid === undefined) { - throw new Error('Document not found'); - } - - this.records[nid].deleted = true; - this.records[nid].data = new Map(); - } - - matchAll(): Match { - const weight = new Match(); - for (let i = 0; i < this.records.length; i++) { - weight.addScore(i, 1); - } - return weight; - } - - clear() { - this.records = []; - this.idMap.clear(); - this.invertedIndex.forEach(v => v.clear()); - } - - private queryRaw(query: Query): Match { - if (query.type === 'match') { - const iidx = this.invertedIndex.get(query.field as string); - if (!iidx) { - throw new Error(`Field '${query.field as string}' not found`); - } - return iidx.match(query.match); - } else if (query.type === 'boolean') { - const weights = query.queries.map(q => this.queryRaw(q)); - if (query.occur === 'must') { - return weights.reduce((acc, w) => acc.and(w)); - } else if (query.occur === 'must_not') { - const total = weights.reduce((acc, w) => acc.and(w)); - return this.matchAll().exclude(total); - } else if (query.occur === 'should') { - return weights.reduce((acc, w) => acc.or(w)); - } - } else if (query.type === 'all') { - return this.matchAll(); - } else if (query.type === 'boost') { - return this.queryRaw(query.query).boost(query.boost); - } else if (query.type === 'exists') { - const iidx = this.invertedIndex.get(query.field as string); - if (!iidx) { - throw new Error(`Field '${query.field as string}' not found`); - } - return iidx.all(); - } - throw new Error(`Query type '${query.type}' not supported`); - } - - query(query: Query): Match { - return this.queryRaw(query).filter(id => !this.records[id].deleted); - } - - search( - query: Query, - options: SearchOptions = {} - ): SearchResult { - const pagination = { - skip: options.pagination?.skip ?? 0, - limit: options.pagination?.limit ?? 100, - }; - - const match = this.query(query); - - const nids = match - .toArray() - .slice(pagination.skip, pagination.skip + pagination.limit); - - return { - pagination: { - count: match.size(), - hasMore: match.size() > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - nodes: nids.map(nid => this.resultNode(match, nid, options)), - }; - } - - aggregate( - query: Query, - field: string, - options: AggregateOptions = {} - ): AggregateResult { - const pagination = { - skip: options.pagination?.skip ?? 0, - limit: options.pagination?.limit ?? 100, - }; - - const match = this.query(query); - - const nids = match.toArray(); - - const buckets: { key: string; nids: number[] }[] = []; - - for (const nid of nids) { - for (const value of this.records[nid].data.get(field) ?? []) { - let bucket = buckets.find(b => b.key === value); - if (!bucket) { - bucket = { key: value, nids: [] }; - buckets.push(bucket); - } - bucket.nids.push(nid); - } - } - - return { - buckets: buckets - .slice(pagination.skip, pagination.skip + pagination.limit) - .map(bucket => { - const result = { - key: bucket.key, - score: match.getScore(bucket.nids[0]), - count: bucket.nids.length, - } as AggregateResult['buckets'][number]; - - if (options.hits) { - const hitsOptions = options.hits; - const pagination = { - skip: options.hits.pagination?.skip ?? 0, - limit: options.hits.pagination?.limit ?? 3, - }; - - const hits = bucket.nids.slice( - pagination.skip, - pagination.skip + pagination.limit - ); - - (result as any).hits = { - pagination: { - count: bucket.nids.length, - hasMore: - bucket.nids.length > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - nodes: hits.map(nid => this.resultNode(match, nid, hitsOptions)), - } as SearchResult; - } - - return result; - }), - pagination: { - count: buckets.length, - hasMore: buckets.length > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - }; - } - - has(id: string): boolean { - return this.idMap.has(id); - } - - private resultNode( - match: Match, - nid: number, - options: SearchOptions - ): SearchResult['nodes'][number] { - const node = { - id: this.records[nid].id, - score: match.getScore(nid), - } as any; - - if (options.fields) { - const fields = {} as Record; - for (const field of options.fields as string[]) { - fields[field] = this.records[nid].data.get(field) ?? ['']; - if (fields[field].length === 1) { - fields[field] = fields[field][0]; - } - } - node.fields = fields; - } - - if (options.highlights) { - const highlights = {} as Record; - for (const { field, before, end } of options.highlights) { - highlights[field] = match - .getHighlighters(nid, field) - .flatMap(highlighter => { - return highlighter(before, end); - }); - } - node.highlights = highlights; - } - - return node; - } -} diff --git a/packages/common/infra/src/sync/indexer/impl/memory/index.ts b/packages/common/infra/src/sync/indexer/impl/memory/index.ts deleted file mode 100644 index c3bddd9dac..0000000000 --- a/packages/common/infra/src/sync/indexer/impl/memory/index.ts +++ /dev/null @@ -1,141 +0,0 @@ -import { map, merge, type Observable, of, Subject, throttleTime } from 'rxjs'; - -import type { - AggregateOptions, - AggregateResult, - Document, - Index, - IndexStorage, - IndexWriter, - Query, - Schema, - SearchOptions, - SearchResult, -} from '../../'; -import { DataStruct } from './data-struct'; - -export class MemoryIndex implements Index { - private readonly data: DataStruct = new DataStruct(this.schema); - broadcast$ = new Subject(); - - constructor(private readonly schema: Schema) {} - - write(): Promise> { - return Promise.resolve(new MemoryIndexWriter(this.data, this.broadcast$)); - } - - async get(id: string): Promise | null> { - return (await this.getAll([id]))[0] ?? null; - } - - getAll(ids?: string[]): Promise[]> { - return Promise.resolve(this.data.getAll(ids)); - } - - has(id: string): Promise { - return Promise.resolve(this.data.has(id)); - } - - async search( - query: Query, - options: SearchOptions = {} - ): Promise> { - return this.data.search(query, options); - } - - search$( - query: Query, - options: SearchOptions = {} - ): Observable> { - return merge(of(1), this.broadcast$).pipe( - throttleTime(500, undefined, { leading: false, trailing: true }), - map(() => this.data.search(query, options)) - ); - } - - async aggregate( - query: Query, - field: string, - options: AggregateOptions = {} - ): Promise> { - return this.data.aggregate(query, field, options); - } - - aggregate$( - query: Query, - field: string, - options: AggregateOptions = {} - ): Observable>> { - return merge(of(1), this.broadcast$).pipe( - throttleTime(500, undefined, { leading: false, trailing: true }), - map(() => this.data.aggregate(query, field, options)) - ); - } - - clear(): Promise { - this.data.clear(); - return Promise.resolve(); - } -} - -export class MemoryIndexWriter implements IndexWriter { - inserts: Document[] = []; - deletes: string[] = []; - - constructor( - private readonly data: DataStruct, - private readonly broadcast$: Subject - ) {} - - async get(id: string): Promise | null> { - return (await this.getAll([id]))[0] ?? null; - } - - getAll(ids: string[]): Promise[]> { - return Promise.resolve(this.data.getAll(ids)); - } - - insert(document: Document): void { - this.inserts.push(document); - } - delete(id: string): void { - this.deletes.push(id); - } - put(document: Document): void { - this.delete(document.id); - this.insert(document); - } - async search( - query: Query, - options: SearchOptions = {} - ): Promise> { - return this.data.search(query, options); - } - async aggregate( - query: Query, - field: string, - options: AggregateOptions = {} - ): Promise> { - return this.data.aggregate(query, field, options); - } - commit(): Promise { - for (const del of this.deletes) { - this.data.delete(del); - } - for (const inst of this.inserts) { - this.data.insert(inst); - } - this.broadcast$.next(1); - return Promise.resolve(); - } - rollback(): void {} - has(id: string): Promise { - return Promise.resolve(this.data.has(id)); - } -} - -export class MemoryIndexStorage implements IndexStorage { - getIndex(_: string, schema: S): Index { - return new MemoryIndex(schema); - } -} diff --git a/packages/common/infra/src/sync/indexer/impl/memory/inverted-index.ts b/packages/common/infra/src/sync/indexer/impl/memory/inverted-index.ts deleted file mode 100644 index 14c17e1dc4..0000000000 --- a/packages/common/infra/src/sync/indexer/impl/memory/inverted-index.ts +++ /dev/null @@ -1,220 +0,0 @@ -import Fuse from 'fuse.js'; - -import { Match } from './match'; - -export interface InvertedIndex { - fieldKey: string; - - match(term: string): Match; - - all(): Match; - - insert(id: number, term: string): void; - - clear(): void; -} - -export class StringInvertedIndex implements InvertedIndex { - index: Map = new Map(); - - constructor(readonly fieldKey: string) {} - - match(term: string): Match { - const match = new Match(); - - for (const id of this.index.get(term) ?? []) { - match.addScore(id, 1); - } - - return match; - } - - all(): Match { - const match = new Match(); - - for (const [_term, ids] of this.index) { - for (const id of ids) { - if (match.getScore(id) === 0) { - match.addScore(id, 1); - } - } - } - - return match; - } - - insert(id: number, term: string): void { - const ids = this.index.get(term) ?? []; - ids.push(id); - this.index.set(term, ids); - } - - clear(): void { - this.index.clear(); - } -} - -export class IntegerInvertedIndex implements InvertedIndex { - index: Map = new Map(); - - constructor(readonly fieldKey: string) {} - - // eslint-disable-next-line sonarjs/no-identical-functions - match(term: string): Match { - const match = new Match(); - - for (const id of this.index.get(term) ?? []) { - match.addScore(id, 1); - } - - return match; - } - - // eslint-disable-next-line sonarjs/no-identical-functions - all(): Match { - const match = new Match(); - - for (const [_term, ids] of this.index) { - for (const id of ids) { - if (match.getScore(id) === 0) { - match.addScore(id, 1); - } - } - } - - return match; - } - - // eslint-disable-next-line sonarjs/no-identical-functions - insert(id: number, term: string): void { - const ids = this.index.get(term) ?? []; - ids.push(id); - this.index.set(term, ids); - } - - clear(): void { - this.index.clear(); - } -} - -export class BooleanInvertedIndex implements InvertedIndex { - index: Map = new Map(); - - constructor(readonly fieldKey: string) {} - - // eslint-disable-next-line sonarjs/no-identical-functions - match(term: string): Match { - const match = new Match(); - - for (const id of this.index.get(term === 'true') ?? []) { - match.addScore(id, 1); - } - - return match; - } - - // eslint-disable-next-line sonarjs/no-identical-functions - all(): Match { - const match = new Match(); - - for (const [_term, ids] of this.index) { - for (const id of ids) { - if (match.getScore(id) === 0) { - match.addScore(id, 1); - } - } - } - - return match; - } - - // eslint-disable-next-line sonarjs/no-identical-functions - insert(id: number, term: string): void { - const ids = this.index.get(term === 'true') ?? []; - ids.push(id); - this.index.set(term === 'true', ids); - } - - clear(): void { - this.index.clear(); - } -} - -export class FullTextInvertedIndex implements InvertedIndex { - records = [] as { id: number; v: string }[]; - index = Fuse.createIndex(['v'], [] as { id: number; v: string }[]); - - constructor(readonly fieldKey: string) {} - - match(term: string): Match { - const searcher = new Fuse( - this.records, - { - includeScore: true, - includeMatches: true, - shouldSort: true, - keys: ['v'], - }, - this.index - ); - const result = searcher.search(term); - - const match = new Match(); - - for (const value of result) { - match.addScore(value.item.id, 1 - (value.score ?? 1)); - - match.addHighlighter(value.item.id, this.fieldKey, (before, after) => { - const matches = value.matches; - if (!matches || matches.length === 0) { - return ['']; - } - - const firstMatch = matches[0]; - - const text = firstMatch.value; - if (!text) { - return ['']; - } - - let result = ''; - let pointer = 0; - for (const match of matches) { - for (const [start, end] of match.indices) { - result += text.substring(pointer, start); - result += `${before}${text.substring(start, end + 1)}${after}`; - pointer = end + 1; - } - } - result += text.substring(pointer); - - return [result]; - }); - } - - return match; - } - - // eslint-disable-next-line sonarjs/no-identical-functions - all(): Match { - const match = new Match(); - - for (const { id } of this.records) { - if (match.getScore(id) === 0) { - match.addScore(id, 1); - } - } - - return match; - } - - insert(id: number, term: string): void { - this.index.add({ id, v: term }); - this.records.push({ id, v: term }); - } - - clear(): void { - this.records = []; - this.index = Fuse.createIndex(['v'], [] as { id: number; v: string }[]); - } -} diff --git a/packages/common/infra/src/sync/indexer/impl/memory/match.ts b/packages/common/infra/src/sync/indexer/impl/memory/match.ts deleted file mode 100644 index 917a62ebab..0000000000 --- a/packages/common/infra/src/sync/indexer/impl/memory/match.ts +++ /dev/null @@ -1,108 +0,0 @@ -export class Match { - scores = new Map(); - highlighters = new Map< - number, - Map string[])[]> - >(); - - constructor() {} - - size() { - return this.scores.size; - } - - getScore(id: number) { - return this.scores.get(id) ?? 0; - } - - addScore(id: number, score: number) { - const currentScore = this.scores.get(id) || 0; - this.scores.set(id, currentScore + score); - } - - getHighlighters(id: number, field: string) { - return this.highlighters.get(id)?.get(field) ?? []; - } - - addHighlighter( - id: number, - field: string, - highlighter: (before: string, after: string) => string[] - ) { - const fields = this.highlighters.get(id) || new Map(); - const highlighters = fields.get(field) || []; - highlighters.push(highlighter); - fields.set(field, highlighters); - this.highlighters.set(id, fields); - } - - and(other: Match) { - const newWeight = new Match(); - for (const [id, score] of this.scores) { - if (other.scores.has(id)) { - newWeight.addScore(id, score + (other.scores.get(id) ?? 0)); - newWeight.copyExtData(this, id); - newWeight.copyExtData(other, id); - } - } - return newWeight; - } - - or(other: Match) { - const newWeight = new Match(); - for (const [id, score] of this.scores) { - newWeight.addScore(id, score); - newWeight.copyExtData(this, id); - } - for (const [id, score] of other.scores) { - newWeight.addScore(id, score); - newWeight.copyExtData(other, id); - } - return newWeight; - } - - exclude(other: Match) { - const newWeight = new Match(); - for (const [id, score] of this.scores) { - if (!other.scores.has(id)) { - newWeight.addScore(id, score); - newWeight.copyExtData(this, id); - } - } - return newWeight; - } - - boost(boost: number) { - const newWeight = new Match(); - for (const [id, score] of this.scores) { - newWeight.addScore(id, score * boost); - newWeight.copyExtData(this, id); - } - return newWeight; - } - - toArray() { - return Array.from(this.scores.entries()) - .sort((a, b) => b[1] - a[1]) - .map(e => e[0]); - } - - filter(predicate: (id: number) => boolean) { - const newWeight = new Match(); - for (const [id, score] of this.scores) { - if (predicate(id)) { - newWeight.addScore(id, score); - newWeight.copyExtData(this, id); - } - } - return newWeight; - } - - private copyExtData(from: Match, id: number) { - for (const [field, highlighters] of from.highlighters.get(id) ?? []) { - for (const highlighter of highlighters) { - this.addHighlighter(id, field, highlighter); - } - } - } -} diff --git a/packages/common/infra/src/sync/indexer/index.ts b/packages/common/infra/src/sync/indexer/index.ts deleted file mode 100644 index e4cfbadfbc..0000000000 --- a/packages/common/infra/src/sync/indexer/index.ts +++ /dev/null @@ -1,6 +0,0 @@ -export * from './document'; -export * from './field-type'; -export * from './indexer'; -export * from './query'; -export * from './schema'; -export * from './searcher'; diff --git a/packages/common/infra/src/sync/indexer/indexer.ts b/packages/common/infra/src/sync/indexer/indexer.ts deleted file mode 100644 index 843ed36655..0000000000 --- a/packages/common/infra/src/sync/indexer/indexer.ts +++ /dev/null @@ -1,41 +0,0 @@ -import type { Document } from './document'; -import type { Schema } from './schema'; -import type { Searcher, Subscriber } from './searcher'; - -export interface Index - extends IndexReader, - Searcher, - Subscriber { - write(): Promise>; - - clear(): Promise; -} - -export interface IndexWriter - extends IndexReader, - Searcher { - insert(document: Document): void; - - put(document: Document): void; - - delete(id: string): void; - - // TODO(@eyhn) - // deleteByQuery(query: Query): void; - - commit(): Promise; - - rollback(): void; -} - -export interface IndexReader { - get(id: string): Promise | null>; - - getAll(ids?: string[]): Promise[]>; - - has(id: string): Promise; -} - -export interface IndexStorage { - getIndex(name: string, schema: S): Index; -} diff --git a/packages/common/infra/src/sync/indexer/query.ts b/packages/common/infra/src/sync/indexer/query.ts deleted file mode 100644 index 921154894e..0000000000 --- a/packages/common/infra/src/sync/indexer/query.ts +++ /dev/null @@ -1,35 +0,0 @@ -import type { Schema } from './schema'; - -export type MatchQuery = { - type: 'match'; - field: keyof S; - match: string; -}; - -export type BoostQuery = { - type: 'boost'; - query: Query; - boost: number; -}; - -export type BooleanQuery = { - type: 'boolean'; - occur: 'should' | 'must' | 'must_not'; - queries: Query[]; -}; - -export type ExistsQuery = { - type: 'exists'; - field: keyof S; -}; - -export type AllQuery = { - type: 'all'; -}; - -export type Query = - | BooleanQuery - | MatchQuery - | AllQuery - | ExistsQuery - | BoostQuery; diff --git a/packages/common/infra/src/sync/indexer/schema.ts b/packages/common/infra/src/sync/indexer/schema.ts deleted file mode 100644 index 891d8a891b..0000000000 --- a/packages/common/infra/src/sync/indexer/schema.ts +++ /dev/null @@ -1,25 +0,0 @@ -import type { FieldType } from './field-type'; - -export type Schema = Record< - string, - | FieldType - | { - type: FieldType; - /** - * If false, the field will not be indexed, and thus not searchable. - * - * default: true - */ - index?: boolean; - /** - * If false, the field will not be stored, and not included in the search result. - * - * default: true - */ - store?: boolean; - } ->; - -export function defineSchema(schema: T): T { - return schema; -} diff --git a/packages/common/infra/src/sync/indexer/searcher.ts b/packages/common/infra/src/sync/indexer/searcher.ts deleted file mode 100644 index dec72ad758..0000000000 --- a/packages/common/infra/src/sync/indexer/searcher.ts +++ /dev/null @@ -1,83 +0,0 @@ -import type { Observable } from 'rxjs'; - -import type { Query } from './query'; -import type { Schema } from './schema'; - -type HighlightAbleField = { - [K in keyof S]: S[K] extends 'FullText' ? K : never; -}[keyof S]; - -export interface Searcher { - search>( - query: Query, - options?: O - ): Promise>; - aggregate>( - query: Query, - field: keyof S, - options?: O - ): Promise>; -} - -export interface Subscriber { - search$>( - query: Query, - options?: O - ): Observable>; - aggregate$>( - query: Query, - field: keyof S, - options?: O - ): Observable>; -} - -type ResultPagination = { - count: number; - limit: number; - skip: number; - hasMore: boolean; -}; - -type PaginationOption = { - limit?: number; - skip?: number; -}; - -export type SearchOptions = { - pagination?: PaginationOption; - highlights?: { - field: HighlightAbleField; - before: string; - end: string; - }[]; - fields?: (keyof S)[]; -}; - -export type SearchResult> = { - pagination: ResultPagination; - nodes: ({ - id: string; - score: number; - } & (O['fields'] extends any[] - ? { fields: { [key in O['fields'][number]]: string | string[] } } - : unknown) & - (O['highlights'] extends any[] - ? { highlights: { [key in O['highlights'][number]['field']]: string[] } } - : unknown))[]; -}; - -export interface AggregateOptions { - pagination?: PaginationOption; - hits?: SearchOptions; -} - -export type AggregateResult> = { - pagination: ResultPagination; - buckets: ({ - key: string; - score: number; - count: number; - } & (O['hits'] extends object - ? { hits: SearchResult } - : unknown))[]; -}; diff --git a/packages/common/infra/src/sync/job/README.md b/packages/common/infra/src/sync/job/README.md deleted file mode 100644 index 30b8b37331..0000000000 --- a/packages/common/infra/src/sync/job/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# job - -Job system abstraction for AFFiNE. Currently, only `IndexedDBJobQueue` is implemented; more backends will be implemented in the future. - -Run background jobs in browser & distributed environment. `runners` can consume tasks simultaneously without additional communication. - -# Basic Usage - -```ts -const queue = new IndexedDBJobQueue('my-queue'); - -await queue.enqueue([ - { - batchKey: '1', - payload: { a: 'hello' }, - }, - { - batchKey: '2', - payload: { a: 'world' }, - }, -]); - -const runner = new JobRunner(queue, job => { - console.log(job); -}); - -runner.start(); - -// Output: -// { batchKey: '1', payload: { a: 'hello' } } -// { batchKey: '2', payload: { a: 'world' } } -``` - -## `batchKey` - -Each job has a `batchKey`, and jobs with the same `batchKey` are handed over to one `runner` for execution at once. -Additionally, if there are ongoing jobs with the same `batchKey`, other `runners` will not take on jobs with this `batchKey`, ensuring exclusive resource locking. - -> In the future, `batchKey` will be used to implement priority. - -## `timeout` - -If the job execution time exceeds 30 seconds, it will be considered a timeout and reassigned to another `runner`. - -## Error Handling - -If an error is thrown during job execution, will log an error, but the job will be considered complete. diff --git a/packages/common/infra/src/sync/job/__tests__/black-box.spec.ts b/packages/common/infra/src/sync/job/__tests__/black-box.spec.ts deleted file mode 100644 index e9bb944578..0000000000 --- a/packages/common/infra/src/sync/job/__tests__/black-box.spec.ts +++ /dev/null @@ -1,231 +0,0 @@ -/** - * @vitest-environment happy-dom - */ -import 'fake-indexeddb/auto'; - -import { afterEach, beforeEach, describe, expect, test, vitest } from 'vitest'; - -import { IndexedDBJobQueue } from '../impl/indexeddb'; -import type { JobQueue } from '../queue'; - -let queue: JobQueue<{ - a: string; -}> = null!; - -describe.each([{ name: 'idb', backend: IndexedDBJobQueue }])( - 'impl tests($name)', - ({ backend }) => { - beforeEach(async () => { - queue = new backend(); - - await queue.clear(); - - vitest.useFakeTimers({ - toFake: ['Date'], - }); - }); - - afterEach(() => { - vitest.useRealTimers(); - }); - - test('basic', async () => { - await queue.enqueue([ - { - batchKey: '1', - payload: { a: 'hello' }, - }, - { - batchKey: '2', - payload: { a: 'world' }, - }, - ]); - const job1 = await queue.accept(); - const job2 = await queue.accept(); - - expect([job1!, job2!]).toEqual([ - [ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - ], - [ - { - id: expect.any(String), - batchKey: '2', - payload: { a: 'world' }, - }, - ], - ]); - - const job3 = await queue.accept(); - expect(job3).toBeNull(); - - await queue.return(job1!); - await queue.return(job2!); - }); - - test('batch', async () => { - await queue.enqueue([ - { - batchKey: '1', - payload: { a: 'hello' }, - }, - { - batchKey: '1', - payload: { a: 'world' }, - }, - ]); - const job1 = await queue.accept(); - - expect(job1).toEqual( - expect.arrayContaining([ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'world' }, - }, - ]) - ); - }); - - test('timeout', async () => { - await queue.enqueue([ - { - batchKey: '1', - payload: { a: 'hello' }, - }, - ]); - { - const job = await queue.accept(); - - expect(job).toEqual([ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - ]); - } - - { - const job = await queue.accept(); - - expect(job).toBeNull(); - } - - vitest.advanceTimersByTime(1000 * 60 * 60); - - { - const job = await queue.accept(); - - expect(job).toEqual([ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - ]); - } - }); - - test('waitForAccept', async () => { - const abort = new AbortController(); - - let result = null as any; - queue.waitForAccept(abort.signal).then(jobs => (result = jobs)); - - await new Promise(resolve => setTimeout(resolve, 100)); - - expect(result).toBeNull(); - - await queue.enqueue([ - { - batchKey: '1', - payload: { a: 'hello' }, - }, - ]); - - await vitest.waitFor(() => { - expect(result).toEqual([ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - ]); - }); - }); - - test('waitForAccept race', async () => { - const abort = new AbortController(); - - let result1 = null as any; - let result2 = null as any; - queue.waitForAccept(abort.signal).then(jobs => (result1 = jobs)); - queue.waitForAccept(abort.signal).then(jobs => (result2 = jobs)); - - await new Promise(resolve => setTimeout(resolve, 100)); - - expect(result1).toBeNull(); - expect(result2).toBeNull(); - - await queue.enqueue([ - { - batchKey: '1', - payload: { a: 'hello' }, - }, - ]); - - await new Promise(resolve => setTimeout(resolve, 100)); - - expect([result1, result2]).toEqual( - expect.arrayContaining([ - [ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - ], - null, - ]) - ); - - await queue.enqueue([ - { - batchKey: '2', - payload: { a: 'world' }, - }, - ]); - - await vitest.waitFor(() => { - expect([result1, result2]).toEqual( - expect.arrayContaining([ - [ - { - id: expect.any(String), - batchKey: '1', - payload: { a: 'hello' }, - }, - ], - [ - { - id: expect.any(String), - batchKey: '2', - payload: { a: 'world' }, - }, - ], - ]) - ); - }); - }); - } -); diff --git a/packages/common/infra/src/sync/job/impl/indexeddb/index.ts b/packages/common/infra/src/sync/job/impl/indexeddb/index.ts deleted file mode 100644 index f6215d52d0..0000000000 --- a/packages/common/infra/src/sync/job/impl/indexeddb/index.ts +++ /dev/null @@ -1,257 +0,0 @@ -import type { DBSchema, IDBPDatabase } from 'idb'; -import { openDB } from 'idb'; -import { merge, Observable, of, throttleTime } from 'rxjs'; - -import { fromPromise } from '../../../../livedata'; -import { throwIfAborted } from '../../../../utils'; -import { exhaustMapWithTrailing } from '../../../../utils/'; -import type { Job, JobParams, JobQueue } from '../../'; - -interface IndexDB extends DBSchema { - jobs: { - key: number; - value: JobRecord; - indexes: { - batchKey: string; - }; - }; -} - -interface JobRecord { - batchKey: string; - startTime: number | null; - payload: any; -} - -export class IndexedDBJobQueue implements JobQueue { - database: IDBPDatabase = null as any; - broadcast = new BroadcastChannel('idb-job-queue:' + this.databaseName); - - constructor(private readonly databaseName: string = 'jobs') {} - - async enqueue(jobs: JobParams[]): Promise { - await this.ensureInitialized(); - const trx = this.database.transaction(['jobs'], 'readwrite'); - - for (const job of jobs) { - await trx.objectStore('jobs').add({ - batchKey: job.batchKey, - payload: job.payload, - startTime: null, - }); - } - - trx.commit(); - - // send broadcast to notify new jobs - this.broadcast.postMessage('new-jobs'); - } - - async accept(): Promise { - await this.ensureInitialized(); - const jobs = []; - const trx = this.database.transaction(['jobs'], 'readwrite', { - durability: 'relaxed', - }); - - // if no priority jobs - - if (jobs.length === 0) { - const batchKeys = trx.objectStore('jobs').index('batchKey').iterate(); - - let currentBatchKey: string = null as any; - let currentBatchJobs = []; - let skipCurrentBatch = false; - - for await (const item of batchKeys) { - if (item.value.batchKey !== currentBatchKey) { - if (!skipCurrentBatch && currentBatchJobs.length > 0) { - break; - } - - currentBatchKey = item.value.batchKey; - currentBatchJobs = []; - skipCurrentBatch = false; - } - if (skipCurrentBatch) { - continue; - } - if (this.isAcceptable(item.value)) { - currentBatchJobs.push({ - id: item.primaryKey, - job: item.value, - }); - } else { - skipCurrentBatch = true; - } - } - - if (skipCurrentBatch === false && currentBatchJobs.length > 0) { - jobs.push(...currentBatchJobs); - } - } - - for (const { id, job } of jobs) { - const startTime = Date.now(); - await trx.objectStore('jobs').put({ ...job, startTime }, id); - } - - if (jobs.length === 0) { - return null; - } - - return jobs.map(({ id, job }) => ({ - id: id.toString(), - batchKey: job.batchKey, - payload: job.payload, - })); - } - - async waitForAccept(signal: AbortSignal): Promise[]> { - const broadcast = new BroadcastChannel( - 'idb-job-queue:' + this.databaseName - ); - - try { - let deferred = Promise.withResolvers(); - - broadcast.onmessage = () => { - deferred.resolve(); - }; - - while (throwIfAborted(signal)) { - const jobs = await this.accept(); - if (jobs !== null) { - return jobs; - } - - await Promise.race([ - deferred.promise, - new Promise(resolve => { - setTimeout(resolve, 5000); - }), - new Promise((_, reject) => { - // exit if manually stopped - if (signal?.aborted) { - reject(signal.reason); - } - signal?.addEventListener('abort', () => { - reject(signal.reason); - }); - }), - ]); - deferred = Promise.withResolvers(); - } - return []; - } finally { - broadcast.close(); - } - } - - async complete(jobs: Job[]): Promise { - await this.ensureInitialized(); - const trx = this.database.transaction(['jobs'], 'readwrite', { - durability: 'relaxed', - }); - - for (const { id } of jobs) { - await trx - .objectStore('jobs') - .delete(typeof id === 'string' ? parseInt(id) : id); - } - - trx.commit(); - this.broadcast.postMessage('job-completed'); - } - - async return(jobs: Job[], retry: boolean = false): Promise { - await this.ensureInitialized(); - const trx = this.database.transaction(['jobs'], 'readwrite', { - durability: 'relaxed', - }); - - for (const { id } of jobs) { - if (retry) { - const nid = typeof id === 'string' ? parseInt(id) : id; - const job = await trx.objectStore('jobs').get(nid); - if (job) { - await trx.objectStore('jobs').put({ ...job, startTime: null }, nid); - } - } else { - await trx - .objectStore('jobs') - .delete(typeof id === 'string' ? parseInt(id) : id); - } - } - - trx.commit(); - - this.broadcast.postMessage('job-completed'); - } - - async clear(): Promise { - await this.ensureInitialized(); - const trx = this.database.transaction(['jobs'], 'readwrite', { - durability: 'relaxed', - }); - await trx.objectStore('jobs').clear(); - } - - private async ensureInitialized(): Promise { - if (!this.database) { - await this.initialize(); - } - } - - private async initialize(): Promise { - if (this.database) { - return; - } - this.database = await openDB(this.databaseName, 1, { - upgrade(database) { - const jobs = database.createObjectStore('jobs', { - autoIncrement: true, - }); - jobs.createIndex('batchKey', 'batchKey'); - }, - }); - } - - TIMEOUT = 1000 * 30 /* 30 seconds */; - - private isTimeout(job: JobRecord) { - return job.startTime !== null && job.startTime + this.TIMEOUT < Date.now(); - } - - private isAcceptable(job: JobRecord) { - return job.startTime === null || this.isTimeout(job); - } - - get status$() { - return merge( - of(1), - new Observable(subscriber => { - const broadcast = new BroadcastChannel( - 'idb-job-queue:' + this.databaseName - ); - - broadcast.onmessage = () => { - subscriber.next(1); - }; - return () => { - broadcast.close(); - }; - }) - ).pipe( - throttleTime(300, undefined, { leading: true, trailing: true }), - exhaustMapWithTrailing(() => - fromPromise(async () => { - await this.ensureInitialized(); - const trx = this.database.transaction(['jobs'], 'readonly'); - const remaining = await trx.objectStore('jobs').count(); - return { remaining }; - }) - ) - ); - } -} diff --git a/packages/common/infra/src/sync/job/index.ts b/packages/common/infra/src/sync/job/index.ts deleted file mode 100644 index a4c109e47e..0000000000 --- a/packages/common/infra/src/sync/job/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export * from './queue'; -export * from './runner'; diff --git a/packages/common/infra/src/sync/job/queue.ts b/packages/common/infra/src/sync/job/queue.ts deleted file mode 100644 index 588102a904..0000000000 --- a/packages/common/infra/src/sync/job/queue.ts +++ /dev/null @@ -1,28 +0,0 @@ -import type { Observable } from 'rxjs'; - -export interface JobParams { - batchKey: string; - payload: Payload; -} - -export interface Job extends JobParams { - id: string; -} - -export interface JobQueueStatus { - remaining: number; -} - -export interface JobQueue { - enqueue(jobs: JobParams[]): Promise; - - accept(): Promise[] | null>; - - waitForAccept(signal: AbortSignal): Promise[]>; - - return(jobs: Job[], retry?: boolean): Promise; - - clear(): Promise; - - status$: Observable; -} diff --git a/packages/common/infra/src/sync/job/runner.ts b/packages/common/infra/src/sync/job/runner.ts deleted file mode 100644 index d25dc1ba01..0000000000 --- a/packages/common/infra/src/sync/job/runner.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { DebugLogger } from '@affine/debug'; - -import { MANUALLY_STOP, throwIfAborted } from '../../utils'; -import type { Job, JobQueue } from './queue'; - -const logger = new DebugLogger('job-runner'); - -export class JobRunner { - abort: AbortController | null = null; - - constructor( - private readonly queue: JobQueue, - private readonly worker: ( - jobs: Job[], - signal: AbortSignal - ) => Promise, - private readonly interval: () => Promise = async () => {} - ) {} - - start() { - this.stop(); - this.abort = new AbortController(); - this.loop(this.abort.signal).catch(err => { - if (err === MANUALLY_STOP) { - return; - } - logger.error(err); - }); - } - - stop() { - this.abort?.abort(MANUALLY_STOP); - this.abort = null; - } - - async loop(signal: AbortSignal) { - while (throwIfAborted(signal)) { - const jobs = await this.queue.waitForAccept(signal); - - if (jobs !== null) { - try { - await this.worker(jobs, signal); - await this.queue.return(jobs); - } catch (err) { - if (err === MANUALLY_STOP) { - await this.queue.return(jobs, true); - } else { - // TODO: retry logic - await this.queue.return(jobs); - } - logger.error( - 'Error processing jobs', - err instanceof Error ? (err.stack ?? err.message) : err - ); - } - } else { - await new Promise(resolve => setTimeout(resolve, 1000)); - } - - await this.interval(); - } - } -} diff --git a/packages/common/nbstore/package.json b/packages/common/nbstore/package.json index aa04b9a2c3..b1793ae234 100644 --- a/packages/common/nbstore/package.json +++ b/packages/common/nbstore/package.json @@ -13,12 +13,16 @@ "./idb/v1": "./src/impls/idb/v1/index.ts", "./cloud": "./src/impls/cloud/index.ts", "./sqlite": "./src/impls/sqlite/index.ts", - "./sqlite/v1": "./src/impls/sqlite/v1/index.ts" + "./sqlite/v1": "./src/impls/sqlite/v1/index.ts", + "./sync": "./src/sync/index.ts", + "./frontend": "./src/frontend/index.ts" }, "dependencies": { "@datastructures-js/binary-search-tree": "^5.3.2", "@toeverything/infra": "workspace:*", "eventemitter2": "^6.4.9", + "graphemer": "^1.4.0", + "lib0": "^0.2.99", "lodash-es": "^4.17.21", "nanoid": "^5.0.9", "rxjs": "^7.8.1", @@ -29,6 +33,7 @@ "devDependencies": { "@affine/error": "workspace:*", "@affine/graphql": "workspace:*", + "@blocksuite/affine": "workspace:*", "fake-indexeddb": "^6.0.0", "idb": "^8.0.0", "socket.io-client": "^4.8.1", @@ -37,6 +42,7 @@ "peerDependencies": { "@affine/error": "workspace:*", "@affine/graphql": "workspace:*", + "@blocksuite/affine": "workspace:*", "idb": "^8.0.0", "socket.io-client": "^4.7.5" } diff --git a/packages/common/nbstore/src/frontend/index.ts b/packages/common/nbstore/src/frontend/index.ts index f4829dcdbf..900b489041 100644 --- a/packages/common/nbstore/src/frontend/index.ts +++ b/packages/common/nbstore/src/frontend/index.ts @@ -1,3 +1,4 @@ export * from './awareness'; export * from './blob'; export * from './doc'; +export * from './indexer'; diff --git a/packages/common/nbstore/src/frontend/indexer.ts b/packages/common/nbstore/src/frontend/indexer.ts new file mode 100644 index 0000000000..75862ea492 --- /dev/null +++ b/packages/common/nbstore/src/frontend/indexer.ts @@ -0,0 +1,45 @@ +import type { IndexerStorage } from '../storage'; +import type { IndexerSync } from '../sync/indexer'; + +export class IndexerFrontend { + constructor( + public readonly storage: IndexerStorage, + public readonly sync: IndexerSync + ) {} + + get state$() { + return this.sync.state$; + } + + docState$(docId: string) { + return this.sync.docState$(docId); + } + + search = this.storage.search.bind(this.storage); + aggregate = this.storage.aggregate.bind(this.storage); + // eslint-disable-next-line rxjs/finnish + search$ = this.storage.search$.bind(this.storage); + // eslint-disable-next-line rxjs/finnish + aggregate$ = this.storage.aggregate$.bind(this.storage); + + addPriority(docId: string, priority: number) { + return this.sync.addPriority(docId, priority); + } + + waitForCompleted(signal?: AbortSignal) { + return this.sync.waitForCompleted(signal); + } + + waitForDocCompleted(docId: string, signal?: AbortSignal) { + return this.sync.waitForDocCompleted(docId, signal); + } + + waitForDocCompletedWithPriority( + docId: string, + priority: number, + signal?: AbortSignal + ) { + const undo = this.addPriority(docId, priority); + return this.sync.waitForDocCompleted(docId, signal).finally(() => undo()); + } +} diff --git a/packages/common/nbstore/src/impls/idb/index.ts b/packages/common/nbstore/src/impls/idb/index.ts index de8f2ee724..9ea3f7676b 100644 --- a/packages/common/nbstore/src/impls/idb/index.ts +++ b/packages/common/nbstore/src/impls/idb/index.ts @@ -3,15 +3,26 @@ import { IndexedDBBlobStorage } from './blob'; import { IndexedDBBlobSyncStorage } from './blob-sync'; import { IndexedDBDocStorage } from './doc'; import { IndexedDBDocSyncStorage } from './doc-sync'; +import { IndexedDBIndexerStorage } from './indexer'; +import { IndexedDBIndexerSyncStorage } from './indexer-sync'; export * from './blob'; export * from './blob-sync'; export * from './doc'; export * from './doc-sync'; +export * from './indexer'; +export * from './indexer-sync'; export const idbStorages = [ IndexedDBDocStorage, IndexedDBBlobStorage, IndexedDBDocSyncStorage, IndexedDBBlobSyncStorage, + IndexedDBIndexerStorage, + IndexedDBIndexerSyncStorage, +] satisfies StorageConstructor[]; + +export const idbStoragesIndexerOnly = [ + IndexedDBIndexerStorage, + IndexedDBIndexerSyncStorage, ] satisfies StorageConstructor[]; diff --git a/packages/common/nbstore/src/impls/idb/indexer-sync.ts b/packages/common/nbstore/src/impls/idb/indexer-sync.ts new file mode 100644 index 0000000000..690fa86078 --- /dev/null +++ b/packages/common/nbstore/src/impls/idb/indexer-sync.ts @@ -0,0 +1,38 @@ +import { share } from '../../connection'; +import type { DocClock } from '../../storage/doc'; +import { IndexerSyncStorageBase } from '../../storage/indexer-sync'; +import { IDBConnection, type IDBConnectionOptions } from './db'; + +export class IndexedDBIndexerSyncStorage extends IndexerSyncStorageBase { + static readonly identifier = 'IndexedDBIndexerSyncStorage'; + + readonly connection = share(new IDBConnection(this.options)); + + constructor(private readonly options: IDBConnectionOptions) { + super(); + } + + async getDocIndexedClock(docId: string): Promise { + const tx = this.connection.inner.db.transaction('indexerSync', 'readonly'); + const store = tx.store; + const result = await store.get(docId); + return result + ? { docId: result.docId, timestamp: result.indexedClock } + : null; + } + + async setDocIndexedClock(docClock: DocClock): Promise { + const tx = this.connection.inner.db.transaction('indexerSync', 'readwrite'); + const store = tx.store; + await store.put({ + docId: docClock.docId, + indexedClock: docClock.timestamp, + }); + } + + async clearDocIndexedClock(docId: string): Promise { + const tx = this.connection.inner.db.transaction('indexerSync', 'readwrite'); + const store = tx.store; + await store.delete(docId); + } +} diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/__tests__/bm25.spec.ts b/packages/common/nbstore/src/impls/idb/indexer/__tests__/bm25.spec.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/__tests__/bm25.spec.ts rename to packages/common/nbstore/src/impls/idb/indexer/__tests__/bm25.spec.ts diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/__tests__/highlighter.spec.ts b/packages/common/nbstore/src/impls/idb/indexer/__tests__/highlighter.spec.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/__tests__/highlighter.spec.ts rename to packages/common/nbstore/src/impls/idb/indexer/__tests__/highlighter.spec.ts diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/__tests__/tokenizer.spec.ts b/packages/common/nbstore/src/impls/idb/indexer/__tests__/tokenizer.spec.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/__tests__/tokenizer.spec.ts rename to packages/common/nbstore/src/impls/idb/indexer/__tests__/tokenizer.spec.ts diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/bm25.ts b/packages/common/nbstore/src/impls/idb/indexer/bm25.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/bm25.ts rename to packages/common/nbstore/src/impls/idb/indexer/bm25.ts diff --git a/packages/common/nbstore/src/impls/idb/indexer/data-struct.ts b/packages/common/nbstore/src/impls/idb/indexer/data-struct.ts new file mode 100644 index 0000000000..ed6d84791e --- /dev/null +++ b/packages/common/nbstore/src/impls/idb/indexer/data-struct.ts @@ -0,0 +1,493 @@ +import { type IDBPDatabase, type IDBPTransaction, type StoreNames } from 'idb'; + +import { + type AggregateOptions, + type AggregateResult, + type IndexerDocument, + type IndexerFieldSchema, + IndexerSchema, + type Query, + type SearchOptions, + type SearchResult, +} from '../../../storage'; +import type { DocStorageSchema } from '../schema'; +import { highlighter } from './highlighter'; +import { + BooleanInvertedIndex, + FullTextInvertedIndex, + IntegerInvertedIndex, + type InvertedIndex, + StringInvertedIndex, +} from './inverted-index'; +import { Match } from './match'; + +export type DataStructRWTransaction = IDBPTransaction< + DocStorageSchema, + ArrayLike>, + 'readwrite' +>; + +export type DataStructROTransaction = IDBPTransaction< + DocStorageSchema, + ArrayLike>, + 'readonly' | 'readwrite' +>; + +export class DataStruct { + database: IDBPDatabase = null as any; + invertedIndex = new Map>(); + + constructor() { + for (const [tableName, table] of Object.entries(IndexerSchema)) { + const tableInvertedIndex = new Map(); + + for (const [fieldName, type] of Object.entries(table)) { + const typeInfo: IndexerFieldSchema = + typeof type === 'string' ? { type } : type; + if ('index' in typeInfo && typeInfo.index === false) { + // If index is false, we don't need to create an inverted index for this field. + continue; + } + if (typeInfo.type === 'String') { + tableInvertedIndex.set( + fieldName, + new StringInvertedIndex(tableName, fieldName) + ); + } else if (typeInfo.type === 'Integer') { + tableInvertedIndex.set( + fieldName, + new IntegerInvertedIndex(tableName, fieldName) + ); + } else if (typeInfo.type === 'FullText') { + tableInvertedIndex.set( + fieldName, + new FullTextInvertedIndex(tableName, fieldName) + ); + } else if (typeInfo.type === 'Boolean') { + tableInvertedIndex.set( + fieldName, + new BooleanInvertedIndex(tableName, fieldName) + ); + } else { + throw new Error(`Field type '${typeInfo.type}' not supported`); + } + } + + this.invertedIndex.set(tableName, tableInvertedIndex); + } + } + + private async update( + trx: DataStructRWTransaction, + table: keyof IndexerSchema, + document: IndexerDocument + ) { + const existsNid = await trx + .objectStore('indexerRecords') + .index('id') + .getKey([table, document.id]); + + const exists = existsNid + ? await trx.objectStore('indexerRecords').get(existsNid) + : null; + + if (!existsNid || !exists) { + // if not exists, return + return; + } + + // delete exists one + await this.deleteByNid(trx, existsNid); + + const dataMap = new Map([...exists.data, ...document.fields]); // merge exists data with new data + const nid = await trx + .objectStore('indexerRecords') + .put({ table, id: document.id, data: dataMap }); + + for (const [key, values] of dataMap) { + const type = IndexerSchema[table][ + key as keyof IndexerSchema[typeof table] + ] as IndexerFieldSchema; + if (!type) { + continue; + } + + const typeInfo = typeof type === 'string' ? { type } : type; + if (typeInfo.index !== false) { + // If index is false, the field will not be indexed + const iidx = this.invertedIndex.get(table)?.get(key); + if (!iidx) { + continue; + } + await iidx.insert(trx, nid, values); + } + } + } + + private async insert( + trx: DataStructRWTransaction, + table: keyof IndexerSchema, + document: IndexerDocument + ) { + const existsNid = await trx + .objectStore('indexerRecords') + .index('id') + .getKey([table, document.id]); + + if (existsNid) { + // delete exists one + await this.deleteByNid(trx, existsNid); + } + + const dataMap = document.fields; + + const nid = await trx + .objectStore('indexerRecords') + .put({ table, id: document.id, data: dataMap }); + + for (const [key, values] of dataMap) { + const type = IndexerSchema[table][ + key as keyof IndexerSchema[typeof table] + ] as IndexerFieldSchema; + if (!type) { + continue; + } + + const typeInfo = typeof type === 'string' ? { type } : type; + if (typeInfo.index !== false) { + // If index is false, the field will not be indexed + const iidx = this.invertedIndex.get(table)?.get(key); + if (!iidx) { + continue; + } + await iidx.insert(trx, nid, values); + } + } + } + + private async deleteByNid(trx: DataStructRWTransaction, nid: number) { + await trx.objectStore('indexerRecords').delete(nid); + + const indexIds = await trx + .objectStore('invertedIndex') + .index('nid') + .getAllKeys(nid); + + for (const indexId of indexIds) { + await trx.objectStore('invertedIndex').delete(indexId); + } + } + + private async delete( + trx: DataStructRWTransaction, + table: keyof IndexerSchema, + id: string + ) { + const nid = await trx + .objectStore('indexerRecords') + .index('id') + .getKey([table, id]); + + if (nid) { + await this.deleteByNid(trx, nid); + } else { + return; + } + } + + async deleteByQuery( + trx: DataStructRWTransaction, + table: keyof IndexerSchema, + query: Query + ) { + const match = await this.queryRaw(trx, table, query); + + for (const nid of match.scores.keys()) { + await this.deleteByNid(trx, nid); + } + } + + async batchWrite( + trx: DataStructRWTransaction, + table: keyof IndexerSchema, + deleteByQueries: Query[], + deletes: string[], + inserts: IndexerDocument[], + updates: IndexerDocument[] + ) { + for (const query of deleteByQueries) { + await this.deleteByQuery(trx, table, query); + } + for (const del of deletes) { + await this.delete(trx, table, del); + } + for (const inst of inserts) { + await this.insert(trx, table, inst); + } + for (const update of updates) { + await this.update(trx, table, update); + } + } + + async matchAll( + trx: DataStructROTransaction, + table: keyof IndexerSchema + ): Promise { + const allNids = await trx + .objectStore('indexerRecords') + .index('table') + .getAllKeys(table); + const match = new Match(); + + for (const nid of allNids) { + match.addScore(nid, 1); + } + return match; + } + + async queryRaw( + trx: DataStructROTransaction, + table: keyof IndexerSchema, + query: Query + ): Promise { + if (query.type === 'match') { + const iidx = this.invertedIndex.get(table)?.get(query.field as string); + if (!iidx) { + return new Match(); + } + return await iidx.match(trx, query.match); + } else if (query.type === 'boolean') { + const weights = []; + for (const q of query.queries) { + weights.push(await this.queryRaw(trx, table, q)); + } + if (query.occur === 'must') { + return weights.reduce((acc, w) => acc.and(w)); + } else if (query.occur === 'must_not') { + const total = weights.reduce((acc, w) => acc.and(w)); + return (await this.matchAll(trx, table)).exclude(total); + } else if (query.occur === 'should') { + return weights.reduce((acc, w) => acc.or(w)); + } + } else if (query.type === 'all') { + return await this.matchAll(trx, table); + } else if (query.type === 'boost') { + return (await this.queryRaw(trx, table, query.query)).boost(query.boost); + } else if (query.type === 'exists') { + const iidx = this.invertedIndex.get(table)?.get(query.field as string); + if (!iidx) { + return new Match(); + } + return await iidx.all(trx); + } + throw new Error(`Query type '${query.type}' not supported`); + } + + async clear(trx: DataStructRWTransaction) { + await trx.objectStore('indexerRecords').clear(); + await trx.objectStore('invertedIndex').clear(); + await trx.objectStore('indexerMetadata').clear(); + } + + async search( + trx: DataStructROTransaction, + table: keyof IndexerSchema, + query: Query, + options: SearchOptions = {} + ): Promise> { + const pagination = { + skip: options.pagination?.skip ?? 0, + limit: options.pagination?.limit ?? 100, + }; + + const match = await this.queryRaw(trx, table, query); + + const nids = match + .toArray() + .slice(pagination.skip, pagination.skip + pagination.limit); + + const nodes = []; + for (const nid of nids) { + const record = await trx.objectStore('indexerRecords').get(nid); + if (!record) { + continue; + } + nodes.push(this.resultNode(record, options, match, nid)); + } + + return { + pagination: { + count: match.size(), + hasMore: match.size() > pagination.limit + pagination.skip, + limit: pagination.limit, + skip: pagination.skip, + }, + nodes: nodes, + }; + } + + async aggregate( + trx: DataStructROTransaction, + table: keyof IndexerSchema, + query: Query, + field: string, + options: AggregateOptions = {} + ): Promise> { + const pagination = { + skip: options.pagination?.skip ?? 0, + limit: options.pagination?.limit ?? 100, + }; + + const hitPagination = options.hits + ? { + skip: options.hits.pagination?.skip ?? 0, + limit: options.hits.pagination?.limit ?? 3, + } + : { skip: 0, limit: 0 }; + + const match = await this.queryRaw(trx, table, query); + + const nids = match.toArray(); + + const buckets: { + key: string; + nids: number[]; + hits: SearchResult['nodes']; + }[] = []; + + for (const nid of nids) { + const record = await trx.objectStore('indexerRecords').get(nid); + if (!record) { + continue; + } + const values = record.data.get(field); + for (const value of values ?? []) { + let bucket; + let bucketIndex = buckets.findIndex(b => b.key === value); + if (bucketIndex === -1) { + bucket = { key: value, nids: [], hits: [] }; + buckets.push(bucket); + bucketIndex = buckets.length - 1; + } else { + bucket = buckets[bucketIndex]; + } + + if ( + bucketIndex >= pagination.skip && + bucketIndex < pagination.skip + pagination.limit + ) { + bucket.nids.push(nid); + if ( + bucket.nids.length - 1 >= hitPagination.skip && + bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit + ) { + bucket.hits.push( + this.resultNode(record, options.hits ?? {}, match, nid) + ); + } + } + } + } + + return { + buckets: buckets + .slice(pagination.skip, pagination.skip + pagination.limit) + .map(bucket => { + const result = { + key: bucket.key, + score: match.getScore(bucket.nids[0]), + count: bucket.nids.length, + } as AggregateResult['buckets'][number]; + + if (options.hits) { + (result as any).hits = { + pagination: { + count: bucket.nids.length, + hasMore: + bucket.nids.length > hitPagination.limit + hitPagination.skip, + limit: hitPagination.limit, + skip: hitPagination.skip, + }, + nodes: bucket.hits, + } as SearchResult; + } + + return result; + }), + pagination: { + count: buckets.length, + hasMore: buckets.length > pagination.limit + pagination.skip, + limit: pagination.limit, + skip: pagination.skip, + }, + }; + } + + async readonly(database: IDBPDatabase) { + return database.transaction( + ['indexerRecords', 'invertedIndex', 'indexerMetadata'], + 'readonly', + { durability: 'relaxed' } + ); + } + + async readwrite(database: IDBPDatabase) { + return database.transaction( + ['indexerRecords', 'invertedIndex', 'indexerMetadata'], + 'readwrite', + { durability: 'relaxed' } + ); + } + + private resultNode( + record: { id: string; data: Map }, + options: SearchOptions, + match?: Match, + nid?: number + ): SearchResult['nodes'][number] { + const node = { + id: record.id, + score: match && nid ? match.getScore(nid) : 1, + } as any; + + if (options.fields) { + const fields = {} as Record; + for (const field of options.fields as string[]) { + fields[field] = record.data.get(field) ?? ['']; + if (fields[field].length === 1) { + fields[field] = fields[field][0]; + } + } + node.fields = fields; + } + + if (match && nid && options.highlights) { + const highlights = {} as Record; + for (const { field, before, end } of options.highlights) { + const highlightValues = match.getHighlighters(nid, field); + if (highlightValues) { + const rawValues = record.data.get(field) ?? []; + highlights[field] = Array.from(highlightValues) + .map(([index, ranges]) => { + const raw = rawValues[index]; + + if (raw) { + return ( + highlighter(raw, before, end, ranges, { + maxPrefix: 20, + maxLength: 50, + }) ?? '' + ); + } + + return ''; + }) + .filter(Boolean); + } + } + node.highlights = highlights; + } + + return node; + } +} diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/highlighter.ts b/packages/common/nbstore/src/impls/idb/indexer/highlighter.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/highlighter.ts rename to packages/common/nbstore/src/impls/idb/indexer/highlighter.ts diff --git a/packages/common/nbstore/src/impls/idb/indexer/index.ts b/packages/common/nbstore/src/impls/idb/indexer/index.ts new file mode 100644 index 0000000000..587db67a16 --- /dev/null +++ b/packages/common/nbstore/src/impls/idb/indexer/index.ts @@ -0,0 +1,203 @@ +import { merge, Observable, of, Subject, throttleTime } from 'rxjs'; + +import type { + AggregateOptions, + AggregateResult, + IndexerDocument, + IndexerSchema, + Query, + SearchOptions, + SearchResult, +} from '../../../storage'; +import { IndexerStorageBase } from '../../../storage'; +import { IDBConnection, type IDBConnectionOptions } from '../db'; +import { DataStruct } from './data-struct'; +import { backoffRetry, exhaustMapWithTrailing, fromPromise } from './utils'; + +export class IndexedDBIndexerStorage extends IndexerStorageBase { + static readonly identifier = 'IndexedDBIndexerStorage'; + readonly connection = new IDBConnection(this.options); + override isReadonly = false; + private readonly data = new DataStruct(); + private readonly tableUpdate$ = new Subject(); + + /** + * The write operations of IndexedDBIndexerStorage are first cached in pendingUpdates, + * and then committed to IndexedDB in a batch through the refresh method. + */ + private readonly pendingUpdates: Record< + keyof IndexerSchema, + { + deleteByQueries: Query[]; + deletes: string[]; + inserts: IndexerDocument[]; + updates: IndexerDocument[]; + } + > = { + doc: { deleteByQueries: [], deletes: [], inserts: [], updates: [] }, + block: { deleteByQueries: [], deletes: [], inserts: [], updates: [] }, + }; + + get channel() { + return this.connection.inner.channel; + } + + get database() { + return this.connection.inner.db; + } + + constructor(private readonly options: IDBConnectionOptions) { + super(); + } + + override async search< + T extends keyof IndexerSchema, + const O extends SearchOptions, + >(table: T, query: Query, options?: O): Promise> { + const trx = await this.data.readonly(this.database); + return this.data.search(trx, table, query, options); + } + override async aggregate< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Promise> { + const trx = await this.data.readonly(this.database); + return this.data.aggregate(trx, table, query, field as string, options); + } + override search$< + T extends keyof IndexerSchema, + const O extends SearchOptions, + >(table: T, query: Query, options?: O): Observable> { + return merge(of(1), this.watchTableUpdated(table)).pipe( + throttleTime(3000, undefined, { leading: true, trailing: true }), + exhaustMapWithTrailing(() => { + return fromPromise(async () => { + try { + const trx = await this.data.readonly(this.database); + return await this.data.search(trx, table, query, options); + } catch (error) { + console.error('search error', error); + throw error; + } + }).pipe(backoffRetry()); + }) + ); + } + override aggregate$< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Observable> { + return merge(of(1), this.watchTableUpdated(table)).pipe( + throttleTime(3000, undefined, { leading: true, trailing: true }), + exhaustMapWithTrailing(() => { + return fromPromise(async () => { + try { + const trx = await this.data.readonly(this.database); + return await this.data.aggregate( + trx, + table, + query, + field as string, + options + ); + } catch (error) { + console.error('aggregate error', error); + throw error; + } + }).pipe(backoffRetry()); + }) + ); + } + + override async deleteByQuery( + table: T, + query: Query + ): Promise { + this.pendingUpdates[table].deleteByQueries.push(query); + } + + override insert( + table: T, + document: IndexerDocument + ): Promise { + this.pendingUpdates[table].inserts.push(document); + return Promise.resolve(); + } + + override delete( + table: T, + id: string + ): Promise { + this.pendingUpdates[table].deletes.push(id); + return Promise.resolve(); + } + + override update( + table: T, + document: IndexerDocument + ): Promise { + this.pendingUpdates[table].updates.push(document); + return Promise.resolve(); + } + + override async refresh( + table: T + ): Promise { + const trx = await this.data.readwrite(this.database); + const tables = table ? [table] : (['doc', 'block'] as const); + for (const table of tables) { + await this.data.batchWrite( + trx, + table, + this.pendingUpdates[table].deleteByQueries, + this.pendingUpdates[table].deletes, + this.pendingUpdates[table].inserts, + this.pendingUpdates[table].updates + ); + this.pendingUpdates[table] = { + deleteByQueries: [], + deletes: [], + inserts: [], + updates: [], + }; + } + this.emitTableUpdated(table); + } + + private watchTableUpdated(table: keyof IndexerSchema) { + return new Observable(subscriber => { + const listener = (ev: MessageEvent) => { + if (ev.data.type === 'indexer-updated' && ev.data.table === table) { + subscriber.next(1); + } + }; + + const subscription = this.tableUpdate$.subscribe(updatedTable => { + if (updatedTable === table) { + subscriber.next(1); + } + }); + + this.channel.addEventListener('message', listener); + return () => { + this.channel.removeEventListener('message', listener); + subscription.unsubscribe(); + }; + }); + } + + emitTableUpdated(table: keyof IndexerSchema) { + this.tableUpdate$.next(table); + this.channel.postMessage({ type: 'indexer-updated', table }); + } +} diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts b/packages/common/nbstore/src/impls/idb/indexer/inverted-index.ts similarity index 83% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts rename to packages/common/nbstore/src/impls/idb/indexer/inverted-index.ts index 572e40c054..bed3b542e0 100644 --- a/packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts +++ b/packages/common/nbstore/src/impls/idb/indexer/inverted-index.ts @@ -22,16 +22,18 @@ export interface InvertedIndex { export class StringInvertedIndex implements InvertedIndex { constructor( - readonly fieldKey: string, - readonly index: boolean = true, - readonly store: boolean = true + readonly table: string, + readonly fieldKey: string ) {} async match(trx: DataStructROTransaction, term: string): Promise { const objs = await trx .objectStore('invertedIndex') .index('key') - .getAll(InvertedIndexKey.forString(this.fieldKey, term).buffer()); + .getAll([ + this.table, + InvertedIndexKey.forString(this.fieldKey, term).buffer(), + ]); const match = new Match(); for (const obj of objs) { match.addScore(obj.nid, 1); @@ -45,8 +47,11 @@ export class StringInvertedIndex implements InvertedIndex { .index('key') .getAll( IDBKeyRange.bound( - InvertedIndexKey.forPrefix(this.fieldKey).buffer(), - InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer() + [this.table, InvertedIndexKey.forPrefix(this.fieldKey).buffer()], + [ + this.table, + InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer(), + ] ) ); @@ -65,6 +70,7 @@ export class StringInvertedIndex implements InvertedIndex { async insert(trx: DataStructRWTransaction, id: number, terms: string[]) { for (const term of terms) { await trx.objectStore('invertedIndex').put({ + table: this.table, key: InvertedIndexKey.forString(this.fieldKey, term).buffer(), nid: id, }); @@ -74,16 +80,18 @@ export class StringInvertedIndex implements InvertedIndex { export class IntegerInvertedIndex implements InvertedIndex { constructor( - readonly fieldKey: string, - readonly index: boolean = true, - readonly store: boolean = true + readonly table: string, + readonly fieldKey: string ) {} async match(trx: DataStructROTransaction, term: string): Promise { const objs = await trx .objectStore('invertedIndex') .index('key') - .getAll(InvertedIndexKey.forInt64(this.fieldKey, BigInt(term)).buffer()); + .getAll([ + this.table, + InvertedIndexKey.forInt64(this.fieldKey, BigInt(term)).buffer(), + ]); const match = new Match(); for (const obj of objs) { match.addScore(obj.nid, 1); @@ -98,8 +106,11 @@ export class IntegerInvertedIndex implements InvertedIndex { .index('key') .getAll( IDBKeyRange.bound( - InvertedIndexKey.forPrefix(this.fieldKey).buffer(), - InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer() + [this.table, InvertedIndexKey.forPrefix(this.fieldKey).buffer()], + [ + this.table, + InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer(), + ] ) ); @@ -118,6 +129,7 @@ export class IntegerInvertedIndex implements InvertedIndex { async insert(trx: DataStructRWTransaction, id: number, terms: string[]) { for (const term of terms) { await trx.objectStore('invertedIndex').put({ + table: this.table, key: InvertedIndexKey.forInt64(this.fieldKey, BigInt(term)).buffer(), nid: id, }); @@ -127,9 +139,8 @@ export class IntegerInvertedIndex implements InvertedIndex { export class BooleanInvertedIndex implements InvertedIndex { constructor( - readonly fieldKey: string, - readonly index: boolean = true, - readonly store: boolean = true + readonly table: string, + readonly fieldKey: string ) {} // eslint-disable-next-line sonarjs/no-identical-functions @@ -139,8 +150,11 @@ export class BooleanInvertedIndex implements InvertedIndex { .index('key') .getAll( IDBKeyRange.bound( - InvertedIndexKey.forPrefix(this.fieldKey).buffer(), - InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer() + [this.table, InvertedIndexKey.forPrefix(this.fieldKey).buffer()], + [ + this.table, + InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer(), + ] ) ); @@ -160,9 +174,10 @@ export class BooleanInvertedIndex implements InvertedIndex { const objs = await trx .objectStore('invertedIndex') .index('key') - .getAll( - InvertedIndexKey.forBoolean(this.fieldKey, term === 'true').buffer() - ); + .getAll([ + this.table, + InvertedIndexKey.forBoolean(this.fieldKey, term === 'true').buffer(), + ]); const match = new Match(); for (const obj of objs) { match.addScore(obj.nid, 1); @@ -173,6 +188,7 @@ export class BooleanInvertedIndex implements InvertedIndex { async insert(trx: DataStructRWTransaction, id: number, terms: string[]) { for (const term of terms) { await trx.objectStore('invertedIndex').put({ + table: this.table, key: InvertedIndexKey.forBoolean( this.fieldKey, term === 'true' @@ -185,9 +201,8 @@ export class BooleanInvertedIndex implements InvertedIndex { export class FullTextInvertedIndex implements InvertedIndex { constructor( - readonly fieldKey: string, - readonly index: boolean = true, - readonly store: boolean = true + readonly table: string, + readonly fieldKey: string ) {} async match(trx: DataStructROTransaction, term: string): Promise { @@ -205,8 +220,8 @@ export class FullTextInvertedIndex implements InvertedIndex { const avgFieldLength = ( await trx - .objectStore('kvMetadata') - .get(`full-text:avg-field-length:${this.fieldKey}`) + .objectStore('indexerMetadata') + .get(`full-text:avg-field-length:${this.table}:${this.fieldKey}`) )?.value ?? 0; for (const token of queryTokens) { const key = InvertedIndexKey.forString(this.fieldKey, token.term); @@ -214,7 +229,12 @@ export class FullTextInvertedIndex implements InvertedIndex { .objectStore('invertedIndex') .index('key') .getAll( - IDBKeyRange.bound(key.buffer(), key.add1().buffer(), false, true) + IDBKeyRange.bound( + [this.table, key.buffer()], + [this.table, key.add1().buffer()], + false, + true + ) ); const submatched: { nid: number; @@ -316,8 +336,11 @@ export class FullTextInvertedIndex implements InvertedIndex { .index('key') .getAll( IDBKeyRange.bound( - InvertedIndexKey.forPrefix(this.fieldKey).buffer(), - InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer() + [this.table, InvertedIndexKey.forPrefix(this.fieldKey).buffer()], + [ + this.table, + InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer(), + ] ) ); @@ -348,6 +371,7 @@ export class FullTextInvertedIndex implements InvertedIndex { for (const [term, tokens] of tokenMap) { await trx.objectStore('invertedIndex').put({ + table: this.table, key: InvertedIndexKey.forString(this.fieldKey, term).buffer(), nid: id, pos: { @@ -358,23 +382,26 @@ export class FullTextInvertedIndex implements InvertedIndex { }); } - const kvMetadataStore = trx.objectStore('kvMetadata'); + const indexerMetadataStore = trx.objectStore('indexerMetadata'); // update avg-field-length const totalCount = - (await kvMetadataStore.get(`full-text:field-count:${this.fieldKey}`)) - ?.value ?? 0; - const avgFieldLength = ( - await kvMetadataStore.get( - `full-text:avg-field-length:${this.fieldKey}` + await indexerMetadataStore.get( + `full-text:field-count:${this.table}:${this.fieldKey}` ) )?.value ?? 0; - await kvMetadataStore.put({ - key: `full-text:field-count:${this.fieldKey}`, + const avgFieldLength = + ( + await indexerMetadataStore.get( + `full-text:avg-field-length:${this.table}:${this.fieldKey}` + ) + )?.value ?? 0; + await indexerMetadataStore.put({ + key: `full-text:field-count:${this.table}:${this.fieldKey}`, value: totalCount + 1, }); - await kvMetadataStore.put({ - key: `full-text:avg-field-length:${this.fieldKey}`, + await indexerMetadataStore.put({ + key: `full-text:avg-field-length:${this.table}:${this.fieldKey}`, value: avgFieldLength + (terms.reduce((acc, term) => acc + term.length, 0) - avgFieldLength) / diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/match.ts b/packages/common/nbstore/src/impls/idb/indexer/match.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/match.ts rename to packages/common/nbstore/src/impls/idb/indexer/match.ts diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/tokenizer.ts b/packages/common/nbstore/src/impls/idb/indexer/tokenizer.ts similarity index 100% rename from packages/common/infra/src/sync/indexer/impl/indexeddb/tokenizer.ts rename to packages/common/nbstore/src/impls/idb/indexer/tokenizer.ts diff --git a/packages/common/nbstore/src/impls/idb/indexer/utils.ts b/packages/common/nbstore/src/impls/idb/indexer/utils.ts new file mode 100644 index 0000000000..c30c92d847 --- /dev/null +++ b/packages/common/nbstore/src/impls/idb/indexer/utils.ts @@ -0,0 +1,104 @@ +import { + asyncScheduler, + defer, + exhaustMap, + finalize, + Observable, + type ObservableInput, + type OperatorFunction, + retry, + scheduled, + Subject, + throttle, + throwError, + timer, +} from 'rxjs'; + +import { MANUALLY_STOP } from '../../../utils/throw-if-aborted'; + +/** + * Like exhaustMap, but also includes the trailing value emitted from the source observable while waiting for the preceding inner observable to complete + * + * Original code adapted from https://github.com/ReactiveX/rxjs/issues/5004 + * @param {function(value: T, ?index: number): ObservableInput} project - A function that, when applied to an item emitted by the + * source Observable, returns a projected Observable. + */ +export function exhaustMapWithTrailing( + project: (value: T, index: number) => ObservableInput +): OperatorFunction { + return (source$): Observable => + defer(() => { + const release$ = new Subject(); + return source$.pipe( + throttle(() => release$, { + leading: true, + trailing: true, + }), + exhaustMap((value, index) => + scheduled(project(value, index), asyncScheduler).pipe( + finalize(() => { + release$.next(); + }) + ) + ) + ); + }); +} + +/** + * Convert a promise to an observable. + * + * like `from` but support `AbortSignal`. + */ +export function fromPromise( + promise: Promise | ((signal: AbortSignal) => Promise) +): Observable { + return new Observable(subscriber => { + const abortController = new AbortController(); + + const rawPromise = + promise instanceof Function ? promise(abortController.signal) : promise; + + rawPromise + .then(value => { + subscriber.next(value); + subscriber.complete(); + }) + .catch(error => { + subscriber.error(error); + }); + + return () => abortController.abort(MANUALLY_STOP); + }); +} + +/** + * An operator that retries the source observable when an error occurs. + * + * https://en.wikipedia.org/wiki/Exponential_backoff + */ +export function backoffRetry({ + when, + count = 3, + delay = 200, + maxDelay = 15000, +}: { + when?: (err: any) => boolean; + count?: number; + delay?: number; + maxDelay?: number; +} = {}) { + return (obs$: Observable) => + obs$.pipe( + retry({ + count, + delay: (err, retryIndex) => { + if (when && !when(err)) { + return throwError(() => err); + } + const d = Math.pow(2, retryIndex - 1) * delay; + return timer(Math.min(d, maxDelay)); + }, + }) + ); +} diff --git a/packages/common/nbstore/src/impls/idb/schema.ts b/packages/common/nbstore/src/impls/idb/schema.ts index 4420b72e8e..35e206c2cb 100644 --- a/packages/common/nbstore/src/impls/idb/schema.ts +++ b/packages/common/nbstore/src/impls/idb/schema.ts @@ -37,6 +37,11 @@ Table(PeerClocks) |------|-------|-----------|-----------| | str | str | Date | Date | +Table(IndexerSync) +| docId | clock | +|-------|-------| +| str | Date | + Table(BlobSync) | peer | key | uploadedAt | |------|-----|------------| @@ -124,6 +129,43 @@ export interface DocStorageSchema extends DBSchema { lock: Date; }; }; + indexerSync: { + key: string; + value: { + docId: string; + indexedClock: Date; + }; + }; + indexerMetadata: { + key: string; + value: { + key: string; + value: any; + }; + }; + indexerRecords: { + key: number; + value: { + table: string; + id: string; + data: Map; + }; + indexes: { table: string; id: [string, string] }; + }; + invertedIndex: { + key: number; + value: { + table: string; + nid: number; + pos?: { + i: number /* index */; + l: number /* length */; + rs: [number, number][] /* ranges: [start, end] */; + }; + key: ArrayBuffer; + }; + indexes: { key: [string, ArrayBuffer]; nid: number }; + }; } const migrate: OpenDBCallbacks['upgrade'] = ( @@ -199,11 +241,36 @@ const initBlobSync: Migrate = db => { blobSync.createIndex('peer', 'peer', { unique: false }); }; +const initIndexer: Migrate = db => { + db.createObjectStore('indexerMetadata', { + keyPath: 'key', + }); + const indexRecordsStore = db.createObjectStore('indexerRecords', { + autoIncrement: true, + }); + indexRecordsStore.createIndex('table', 'table', { + unique: false, + }); + indexRecordsStore.createIndex('id', ['table', 'id'], { + unique: true, + }); + const invertedIndexStore = db.createObjectStore('invertedIndex', { + autoIncrement: true, + }); + invertedIndexStore.createIndex('key', ['table', 'key'], { + unique: false, + }); + invertedIndexStore.createIndex('nid', 'nid', { unique: false }); + db.createObjectStore('indexerSync', { + keyPath: 'docId', + autoIncrement: false, + }); +}; // END REGION // 1. all schema changed should be put in migrations // 2. order matters -const migrations: Migrate[] = [init, initBlobSync]; +const migrations: Migrate[] = [init, initBlobSync, initIndexer]; export const migrator = { version: migrations.length, diff --git a/packages/common/nbstore/src/index.ts b/packages/common/nbstore/src/index.ts index ac5f880564..6ef668d4c4 100644 --- a/packages/common/nbstore/src/index.ts +++ b/packages/common/nbstore/src/index.ts @@ -1,5 +1,5 @@ export * from './connection'; -export * from './frontend'; +export type * from './frontend'; // // Only export types. For implementation, please import from '@affine/nbstore/frontend' export * from './storage'; -export * from './sync'; +export type * from './sync'; // Only export types. For implementation, please import from '@affine/nbstore/sync' export * from './utils/universal-id'; diff --git a/packages/common/nbstore/src/storage/doc.ts b/packages/common/nbstore/src/storage/doc.ts index 53cd8521e6..b0ecec16a7 100644 --- a/packages/common/nbstore/src/storage/doc.ts +++ b/packages/common/nbstore/src/storage/doc.ts @@ -47,6 +47,7 @@ export interface DocStorageOptions { export interface DocStorage extends Storage { readonly storageType: 'doc'; readonly isReadonly: boolean; + readonly spaceId: string; /** * Get a doc record with latest binary. */ @@ -103,7 +104,7 @@ export abstract class DocStorageBase implements DocStorage { readonly storageType = 'doc'; abstract readonly connection: Connection; protected readonly locker: Locker = new SingletonLocker(); - protected readonly spaceId = this.options.id; + readonly spaceId = this.options.id; constructor(protected readonly options: Opts & DocStorageOptions) {} diff --git a/packages/common/nbstore/src/storage/dummy/indexer-sync.ts b/packages/common/nbstore/src/storage/dummy/indexer-sync.ts new file mode 100644 index 0000000000..96ec17f744 --- /dev/null +++ b/packages/common/nbstore/src/storage/dummy/indexer-sync.ts @@ -0,0 +1,16 @@ +import { DummyConnection } from '../../connection'; +import type { DocClock } from '../doc'; +import { IndexerSyncStorageBase } from '../indexer-sync'; + +export class DummyIndexerSyncStorage extends IndexerSyncStorageBase { + override connection = new DummyConnection(); + override getDocIndexedClock(_docId: string): Promise { + return Promise.resolve(null); + } + override setDocIndexedClock(_docClock: DocClock): Promise { + return Promise.resolve(); + } + override clearDocIndexedClock(_docId: string): Promise { + return Promise.resolve(); + } +} diff --git a/packages/common/nbstore/src/storage/dummy/indexer.ts b/packages/common/nbstore/src/storage/dummy/indexer.ts new file mode 100644 index 0000000000..958151c62a --- /dev/null +++ b/packages/common/nbstore/src/storage/dummy/indexer.ts @@ -0,0 +1,88 @@ +import { NEVER, type Observable } from 'rxjs'; + +import { DummyConnection } from '../../connection'; +import { + type AggregateOptions, + type AggregateResult, + type IndexerDocument, + type IndexerSchema, + IndexerStorageBase, + type Query, + type SearchOptions, + type SearchResult, +} from '../indexer'; + +export class DummyIndexerStorage extends IndexerStorageBase { + readonly isReadonly = true; + readonly connection = new DummyConnection(); + + override search< + T extends keyof IndexerSchema, + const O extends SearchOptions, + >(_table: T, _query: Query, _options?: O): Promise> { + return Promise.resolve({ + pagination: { count: 0, limit: 0, skip: 0, hasMore: false }, + nodes: [], + }); + } + override aggregate< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + _table: T, + _query: Query, + _field: keyof IndexerSchema[T], + _options?: O + ): Promise> { + return Promise.resolve({ + pagination: { count: 0, limit: 0, skip: 0, hasMore: false }, + buckets: [], + }); + } + override search$< + T extends keyof IndexerSchema, + const O extends SearchOptions, + >(_table: T, _query: Query, _options?: O): Observable> { + return NEVER; + } + override aggregate$< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + _table: T, + _query: Query, + _field: keyof IndexerSchema[T], + _options?: O + ): Observable> { + return NEVER; + } + + override deleteByQuery( + _table: T, + _query: Query + ): Promise { + return Promise.resolve(); + } + + override insert( + _table: T, + _document: IndexerDocument + ): Promise { + return Promise.resolve(); + } + override delete( + _table: T, + _id: string + ): Promise { + return Promise.resolve(); + } + override update( + _table: T, + _document: IndexerDocument + ): Promise { + return Promise.resolve(); + } + override refresh(_table: T): Promise { + return Promise.resolve(); + } +} diff --git a/packages/common/nbstore/src/storage/index.ts b/packages/common/nbstore/src/storage/index.ts index c15bd76cab..cb26d8d690 100644 --- a/packages/common/nbstore/src/storage/index.ts +++ b/packages/common/nbstore/src/storage/index.ts @@ -10,6 +10,10 @@ import { DummyBlobStorage } from './dummy/blob'; import { DummyBlobSyncStorage } from './dummy/blob-sync'; import { DummyDocStorage } from './dummy/doc'; import { DummyDocSyncStorage } from './dummy/doc-sync'; +import { DummyIndexerStorage } from './dummy/indexer'; +import { DummyIndexerSyncStorage } from './dummy/indexer-sync'; +import type { IndexerStorage } from './indexer'; +import type { IndexerSyncStorage } from './indexer-sync'; import type { StorageType } from './storage'; type Storages = @@ -17,7 +21,9 @@ type Storages = | BlobStorage | BlobSyncStorage | DocSyncStorage - | AwarenessStorage; + | AwarenessStorage + | IndexerStorage + | IndexerSyncStorage; export type SpaceStorageOptions = { [K in StorageType]?: Storages & { storageType: K }; @@ -37,6 +43,8 @@ export class SpaceStorage { blobSync: storages.blobSync ?? new DummyBlobSyncStorage(), doc: storages.doc ?? new DummyDocStorage(), docSync: storages.docSync ?? new DummyDocSyncStorage(), + indexer: storages.indexer ?? new DummyIndexerStorage(), + indexerSync: storages.indexerSync ?? new DummyIndexerSyncStorage(), }; } @@ -83,4 +91,5 @@ export * from './doc'; export * from './doc-sync'; export * from './errors'; export * from './history'; +export * from './indexer'; export * from './storage'; diff --git a/packages/common/nbstore/src/storage/indexer-sync.ts b/packages/common/nbstore/src/storage/indexer-sync.ts new file mode 100644 index 0000000000..8be6c8b5d3 --- /dev/null +++ b/packages/common/nbstore/src/storage/indexer-sync.ts @@ -0,0 +1,21 @@ +import type { Connection } from '../connection'; +import type { DocClock } from './doc'; +import type { Storage } from './storage'; + +export interface IndexerSyncStorage extends Storage { + readonly storageType: 'indexerSync'; + + getDocIndexedClock(docId: string): Promise; + + setDocIndexedClock(docClock: DocClock): Promise; + + clearDocIndexedClock(docId: string): Promise; +} + +export abstract class IndexerSyncStorageBase implements IndexerSyncStorage { + readonly storageType = 'indexerSync'; + abstract connection: Connection; + abstract getDocIndexedClock(docId: string): Promise; + abstract setDocIndexedClock(docClock: DocClock): Promise; + abstract clearDocIndexedClock(docId: string): Promise; +} diff --git a/packages/common/nbstore/src/storage/indexer.ts b/packages/common/nbstore/src/storage/indexer.ts new file mode 100644 index 0000000000..6b9de86db4 --- /dev/null +++ b/packages/common/nbstore/src/storage/indexer.ts @@ -0,0 +1,176 @@ +export * from './indexer/document'; +export * from './indexer/field-type'; +export * from './indexer/query'; +export * from './indexer/schema'; + +import type { Observable } from 'rxjs'; + +import type { Connection } from '../connection'; +import type { IndexerDocument } from './indexer/document'; +import type { Query } from './indexer/query'; +import type { IndexerSchema } from './indexer/schema'; +import type { Storage } from './storage'; + +export interface IndexerStorage extends Storage { + readonly storageType: 'indexer'; + readonly isReadonly: boolean; + + search>( + table: T, + query: Query, + options?: O + ): Promise>; + + aggregate>( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Promise>; + + search$>( + table: T, + query: Query, + options?: O + ): Observable>; + + aggregate$< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Observable>; + + deleteByQuery( + table: T, + query: Query + ): Promise; + + insert( + table: T, + document: IndexerDocument + ): Promise; + + delete(table: T, id: string): Promise; + + update( + table: T, + document: IndexerDocument + ): Promise; + + refresh(table: T): Promise; +} + +type ResultPagination = { + count: number; + limit: number; + skip: number; + hasMore: boolean; +}; + +type PaginationOption = { limit?: number; skip?: number }; + +type HighlightAbleField = { + [K in keyof IndexerSchema[T]]: IndexerSchema[T][K] extends 'FullText' + ? K + : never; +}[keyof IndexerSchema[T]]; + +export type SearchOptions = { + pagination?: PaginationOption; + highlights?: { field: HighlightAbleField; before: string; end: string }[]; + fields?: (keyof IndexerSchema[T])[]; +}; + +export type SearchResult< + T extends keyof IndexerSchema, + O extends SearchOptions, +> = { + pagination: ResultPagination; + nodes: ({ id: string; score: number } & (O['fields'] extends any[] + ? { fields: { [key in O['fields'][number]]: string | string[] } } + : unknown) & + (O['highlights'] extends any[] + ? { highlights: { [key in O['highlights'][number]['field']]: string[] } } + : unknown))[]; +}; + +export interface AggregateOptions { + pagination?: PaginationOption; + hits?: SearchOptions; +} + +export type AggregateResult< + T extends keyof IndexerSchema, + O extends AggregateOptions, +> = { + pagination: ResultPagination; + buckets: ({ + key: string; + score: number; + count: number; + } & (O['hits'] extends object + ? { hits: SearchResult } + : unknown))[]; +}; + +export abstract class IndexerStorageBase implements IndexerStorage { + readonly storageType = 'indexer'; + abstract readonly connection: Connection; + abstract readonly isReadonly: boolean; + + abstract search< + T extends keyof IndexerSchema, + const O extends SearchOptions, + >(table: T, query: Query, options?: O): Promise>; + + abstract aggregate< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Promise>; + + abstract search$< + T extends keyof IndexerSchema, + const O extends SearchOptions, + >(table: T, query: Query, options?: O): Observable>; + + abstract aggregate$< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Observable>; + + abstract deleteByQuery( + table: T, + query: Query + ): Promise; + + abstract insert( + table: T, + document: IndexerDocument + ): Promise; + + abstract delete( + table: T, + id: string + ): Promise; + + abstract update( + table: T, + document: IndexerDocument + ): Promise; + + abstract refresh(table: T): Promise; +} diff --git a/packages/common/infra/src/sync/indexer/document.ts b/packages/common/nbstore/src/storage/indexer/document.ts similarity index 55% rename from packages/common/infra/src/sync/indexer/document.ts rename to packages/common/nbstore/src/storage/indexer/document.ts index e2e22d6994..9e2c14d951 100644 --- a/packages/common/infra/src/sync/indexer/document.ts +++ b/packages/common/nbstore/src/storage/indexer/document.ts @@ -1,11 +1,16 @@ -import type { Schema } from './schema'; +import type { IndexerSchema } from './schema'; -export class Document { +export class IndexerDocument< + S extends keyof IndexerSchema = keyof IndexerSchema, +> { constructor(public readonly id: string) {} - fields = new Map(); + fields = new Map(); - public insert(field: F, value: string | string[]) { + public insert( + field: F, + value: string | string[] + ) { const values = this.fields.get(field) ?? []; if (Array.isArray(value)) { values.push(...value); @@ -15,7 +20,9 @@ export class Document { this.fields.set(field, values); } - get(field: F): string[] | string | undefined { + get( + field: F + ): string[] | string | undefined { const values = this.fields.get(field); if (values === undefined) { return undefined; @@ -26,13 +33,13 @@ export class Document { } } - static from( + static from( id: string, map: - | Partial> - | Map - ): Document { - const doc = new Document(id); + | Partial> + | Map + ): IndexerDocument { + const doc = new IndexerDocument(id); if (map instanceof Map) { for (const [key, value] of map) { diff --git a/packages/common/nbstore/src/storage/indexer/field-type.ts b/packages/common/nbstore/src/storage/indexer/field-type.ts new file mode 100644 index 0000000000..202ff7b2ea --- /dev/null +++ b/packages/common/nbstore/src/storage/indexer/field-type.ts @@ -0,0 +1 @@ +export type IndexFieldType = 'Integer' | 'FullText' | 'String' | 'Boolean'; diff --git a/packages/common/nbstore/src/storage/indexer/query.ts b/packages/common/nbstore/src/storage/indexer/query.ts new file mode 100644 index 0000000000..c7d80b5749 --- /dev/null +++ b/packages/common/nbstore/src/storage/indexer/query.ts @@ -0,0 +1,35 @@ +import type { IndexerSchema } from './schema'; + +export type MatchQuery = { + type: 'match'; + field: keyof IndexerSchema[T]; + match: string; +}; + +export type BoostQuery = { + type: 'boost'; + query: Query; + boost: number; +}; + +export type BooleanQuery = { + type: 'boolean'; + occur: 'should' | 'must' | 'must_not'; + queries: Query[]; +}; + +export type ExistsQuery = { + type: 'exists'; + field: keyof IndexerSchema[T]; +}; + +export type AllQuery = { + type: 'all'; +}; + +export type Query = + | BooleanQuery + | MatchQuery + | AllQuery + | ExistsQuery + | BoostQuery; diff --git a/packages/common/nbstore/src/storage/indexer/schema.ts b/packages/common/nbstore/src/storage/indexer/schema.ts new file mode 100644 index 0000000000..cf8886e1c5 --- /dev/null +++ b/packages/common/nbstore/src/storage/indexer/schema.ts @@ -0,0 +1,51 @@ +import type { IndexFieldType } from './field-type'; + +export const IndexerSchema = { + doc: { + docId: 'String', + title: 'FullText', + // summary of the doc, used for preview + summary: { type: 'String', index: false }, + }, + block: { + docId: 'String', + blockId: 'String', + content: 'FullText', + flavour: 'String', + blob: 'String', + // reference doc id + // ['xxx','yyy'] + refDocId: 'String', + // reference info, used for backlink to specific block + // [{"docId":"xxx","mode":"page","blockIds":["gt5Yfq1maYvgNgpi13rIq"]},{"docId":"yyy","mode":"edgeless","blockIds":["k5prpOlDF-9CzfatmO0W7"]}] + ref: { type: 'String', index: false }, + // parent block flavour + parentFlavour: 'String', + // parent block id + parentBlockId: 'String', + // additional info + // { "databaseName": "xxx", "displayMode": "page/edgeless", "noteBlockId": "xxx" } + additional: { type: 'String', index: false }, + markdownPreview: { type: 'String', index: false }, + }, +} satisfies Record>; + +export type IndexerFieldSchema = + | IndexFieldType + | { + type: IndexFieldType; + /** + * If false, the field will not be indexed, and thus not searchable. + * + * default: true + */ + index?: boolean; + /** + * If false, the field will not be stored, and not included in the search result. + * + * default: true + */ + store?: boolean; + }; + +export type IndexerSchema = typeof IndexerSchema; diff --git a/packages/common/nbstore/src/storage/storage.ts b/packages/common/nbstore/src/storage/storage.ts index dca0e3c115..f37e8588e7 100644 --- a/packages/common/nbstore/src/storage/storage.ts +++ b/packages/common/nbstore/src/storage/storage.ts @@ -1,6 +1,13 @@ import type { Connection } from '../connection'; -export type StorageType = 'blob' | 'blobSync' | 'doc' | 'docSync' | 'awareness'; +export type StorageType = + | 'blob' + | 'blobSync' + | 'doc' + | 'docSync' + | 'awareness' + | 'indexer' + | 'indexerSync'; export interface Storage { readonly storageType: StorageType; diff --git a/packages/common/nbstore/src/sync/blob/peer.ts b/packages/common/nbstore/src/sync/blob/peer.ts index b1b709afc1..3dcdf49ac0 100644 --- a/packages/common/nbstore/src/sync/blob/peer.ts +++ b/packages/common/nbstore/src/sync/blob/peer.ts @@ -420,7 +420,11 @@ class BlobSyncPeerStatus { return () => { dispose.unsubscribe(); }; - }); + }).pipe( + share({ + connector: () => new ReplaySubject(1), + }) + ); } private readonly statusUpdatedSubject$ = new Subject(); diff --git a/packages/common/nbstore/src/sync/index.ts b/packages/common/nbstore/src/sync/index.ts index ad5513dcbe..c659c2ad30 100644 --- a/packages/common/nbstore/src/sync/index.ts +++ b/packages/common/nbstore/src/sync/index.ts @@ -4,10 +4,12 @@ import type { SpaceStorage } from '../storage'; import { AwarenessSyncImpl } from './awareness'; import { BlobSyncImpl } from './blob'; import { DocSyncImpl, type DocSyncState } from './doc'; +import { IndexerSyncImpl } from './indexer'; import type { PeerStorageOptions } from './types'; export type { BlobSyncState } from './blob'; export type { DocSyncDocState, DocSyncState } from './doc'; +export type { IndexerDocSyncState, IndexerSyncState } from './indexer'; export interface SyncState { doc?: DocSyncState; @@ -17,6 +19,7 @@ export class Sync { readonly doc: DocSyncImpl; readonly blob: BlobSyncImpl; readonly awareness: AwarenessSyncImpl; + readonly indexer: IndexerSyncImpl; readonly state$: Observable; @@ -26,6 +29,8 @@ export class Sync { const docSync = storages.local.get('docSync'); const blobSync = storages.local.get('blobSync'); const awareness = storages.local.get('awareness'); + const indexer = storages.local.get('indexer'); + const indexerSync = storages.local.get('indexerSync'); this.doc = new DocSyncImpl( { @@ -60,6 +65,7 @@ export class Sync { ]) ), }); + this.indexer = new IndexerSyncImpl(doc, indexer, indexerSync); this.state$ = this.doc.state$.pipe(map(doc => ({ doc }))); } @@ -67,10 +73,12 @@ export class Sync { start() { this.doc?.start(); this.blob?.start(); + this.indexer?.start(); } stop() { this.doc?.stop(); this.blob?.stop(); + this.indexer?.stop(); } } diff --git a/packages/common/nbstore/src/sync/indexer/crawler.ts b/packages/common/nbstore/src/sync/indexer/crawler.ts new file mode 100644 index 0000000000..769dd99e62 --- /dev/null +++ b/packages/common/nbstore/src/sync/indexer/crawler.ts @@ -0,0 +1,862 @@ +import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; +import { Container } from '@blocksuite/affine/global/di'; +import { + InlineDeltaToMarkdownAdapterExtensions, + MarkdownInlineToDeltaAdapterExtensions, +} from '@blocksuite/affine/inlines/preset'; +import type { + AttachmentBlockModel, + BookmarkBlockModel, + EmbedBlockModel, + ImageBlockModel, + TableBlockModel, +} from '@blocksuite/affine/model'; +import { AffineSchemas } from '@blocksuite/affine/schemas'; +import { MarkdownAdapter } from '@blocksuite/affine/shared/adapters'; +import type { AffineTextAttributes } from '@blocksuite/affine/shared/types'; +import { + createYProxy, + type DeltaInsert, + type DraftModel, + Schema, + Transformer, + type TransformerMiddleware, + type YBlock, +} from '@blocksuite/affine/store'; +import { uniq } from 'lodash-es'; +import { + Array as YArray, + type Doc as YDoc, + Map as YMap, + Text as YText, +} from 'yjs'; + +import { IndexerDocument } from '../../storage'; + +const blocksuiteSchema = new Schema(); +blocksuiteSchema.register([...AffineSchemas]); + +interface BlockDocumentInfo { + docId: string; + blockId: string; + content?: string | string[]; + flavour: string; + blob?: string[]; + refDocId?: string[]; + ref?: string[]; + parentFlavour?: string; + parentBlockId?: string; + additional?: { + databaseName?: string; + displayMode?: string; + noteBlockId?: string; + }; + yblock: YMap; + markdownPreview?: string; +} + +const bookmarkFlavours = new Set([ + 'affine:bookmark', + 'affine:embed-youtube', + 'affine:embed-figma', + 'affine:embed-github', + 'affine:embed-loom', +]); + +function generateMarkdownPreviewBuilder( + yRootDoc: YDoc, + workspaceId: string, + blocks: BlockDocumentInfo[] +) { + function yblockToDraftModal(yblock: YBlock): DraftModel | null { + const flavour = yblock.get('sys:flavour') as string; + const blockSchema = blocksuiteSchema.flavourSchemaMap.get(flavour); + if (!blockSchema) { + return null; + } + const keys = Array.from(yblock.keys()) + .filter(key => key.startsWith('prop:')) + .map(key => key.substring(5)); + + const props = Object.fromEntries( + keys.map(key => [key, createYProxy(yblock.get(`prop:${key}`))]) + ); + + return { + props, + id: yblock.get('sys:id') as string, + flavour, + children: [], + role: blockSchema.model.role, + version: (yblock.get('sys:version') as number) ?? blockSchema.version, + keys: Array.from(yblock.keys()) + .filter(key => key.startsWith('prop:')) + .map(key => key.substring(5)), + } as unknown as DraftModel; + } + + const titleMiddleware: TransformerMiddleware = ({ adapterConfigs }) => { + const pages = yRootDoc.getMap('meta').get('pages'); + if (!(pages instanceof YArray)) { + return; + } + for (const meta of pages.toArray()) { + adapterConfigs.set( + 'title:' + meta.get('id'), + meta.get('title')?.toString() ?? 'Untitled' + ); + } + }; + + const baseUrl = `/workspace/${workspaceId}`; + + function getDocLink(docId: string, blockId: string) { + const searchParams = new URLSearchParams(); + searchParams.set('blockIds', blockId); + return `${baseUrl}/${docId}?${searchParams.toString()}`; + } + + const docLinkBaseURLMiddleware: TransformerMiddleware = ({ + adapterConfigs, + }) => { + adapterConfigs.set('docLinkBaseUrl', baseUrl); + }; + + const container = new Container(); + [ + ...MarkdownInlineToDeltaAdapterExtensions, + ...defaultBlockMarkdownAdapterMatchers, + ...InlineDeltaToMarkdownAdapterExtensions, + ].forEach(ext => { + ext.setup(container); + }); + + const provider = container.provider(); + const markdownAdapter = new MarkdownAdapter( + new Transformer({ + schema: blocksuiteSchema, + blobCRUD: { + delete: () => Promise.resolve(), + get: () => Promise.resolve(null), + list: () => Promise.resolve([]), + set: () => Promise.resolve(''), + }, + docCRUD: { + create: () => { + throw new Error('Not implemented'); + }, + get: () => null, + delete: () => {}, + }, + middlewares: [docLinkBaseURLMiddleware, titleMiddleware], + }), + provider + ); + + const markdownPreviewCache = new WeakMap(); + + function trimCodeBlock(markdown: string) { + const lines = markdown.split('\n').filter(line => line.trim() !== ''); + if (lines.length > 5) { + return [...lines.slice(0, 4), '...', lines.at(-1), ''].join('\n'); + } + return [...lines, ''].join('\n'); + } + + function trimParagraph(markdown: string) { + const lines = markdown.split('\n').filter(line => line.trim() !== ''); + + if (lines.length > 3) { + return [...lines.slice(0, 3), '...', lines.at(-1), ''].join('\n'); + } + + return [...lines, ''].join('\n'); + } + + function getListDepth(block: BlockDocumentInfo) { + let parentBlockCount = 0; + let currentBlock: BlockDocumentInfo | undefined = block; + do { + currentBlock = blocks.find( + b => b.blockId === currentBlock?.parentBlockId + ); + + // reach the root block. do not count it. + if (!currentBlock || currentBlock.flavour !== 'affine:list') { + break; + } + parentBlockCount++; + } while (currentBlock); + return parentBlockCount; + } + + // only works for list block + function indentMarkdown(markdown: string, depth: number) { + if (depth <= 0) { + return markdown; + } + + return ( + markdown + .split('\n') + .map(line => ' '.repeat(depth) + line) + .join('\n') + '\n' + ); + } + + const generateDatabaseMarkdownPreview = (block: BlockDocumentInfo) => { + const isDatabaseBlock = (block: BlockDocumentInfo) => { + return block.flavour === 'affine:database'; + }; + + const model = yblockToDraftModal(block.yblock); + + if (!model) { + return null; + } + + let dbBlock: BlockDocumentInfo | null = null; + + if (isDatabaseBlock(block)) { + dbBlock = block; + } else { + const parentBlock = blocks.find(b => b.blockId === block.parentBlockId); + + if (parentBlock && isDatabaseBlock(parentBlock)) { + dbBlock = parentBlock; + } + } + + if (!dbBlock) { + return null; + } + + const url = getDocLink(block.docId, dbBlock.blockId); + const title = dbBlock.additional?.databaseName; + + return `[database ยท ${title || 'Untitled'}][](${url})\n`; + }; + + const generateImageMarkdownPreview = (block: BlockDocumentInfo) => { + const isImageModel = ( + model: DraftModel | null + ): model is DraftModel => { + return model?.flavour === 'affine:image'; + }; + + const model = yblockToDraftModal(block.yblock); + + if (!isImageModel(model)) { + return null; + } + + const info = ['an image block']; + + if (model.props.sourceId) { + info.push(`file id ${model.props.sourceId}`); + } + + if (model.props.caption) { + info.push(`with caption ${model.props.caption}`); + } + + return info.join(', ') + '\n'; + }; + + const generateEmbedMarkdownPreview = (block: BlockDocumentInfo) => { + const isEmbedModel = ( + model: DraftModel | null + ): model is DraftModel => { + return ( + model?.flavour === 'affine:embed-linked-doc' || + model?.flavour === 'affine:embed-synced-doc' + ); + }; + + const draftModel = yblockToDraftModal(block.yblock); + if (!isEmbedModel(draftModel)) { + return null; + } + + const url = getDocLink(block.docId, draftModel.id); + + return `[](${url})\n`; + }; + + const generateLatexMarkdownPreview = (block: BlockDocumentInfo) => { + let content = + typeof block.content === 'string' + ? block.content.trim() + : block.content?.join('').trim(); + + content = content?.split('\n').join(' ') ?? ''; + + return `LaTeX, with value ${content}\n`; + }; + + const generateBookmarkMarkdownPreview = (block: BlockDocumentInfo) => { + const isBookmarkModel = ( + model: DraftModel | null + ): model is DraftModel => { + return bookmarkFlavours.has(model?.flavour ?? ''); + }; + + const draftModel = yblockToDraftModal(block.yblock); + if (!isBookmarkModel(draftModel)) { + return null; + } + const title = draftModel.props.title; + const url = draftModel.props.url; + return `[${title}](${url})\n`; + }; + + const generateAttachmentMarkdownPreview = (block: BlockDocumentInfo) => { + const isAttachmentModel = ( + model: DraftModel | null + ): model is DraftModel => { + return model?.flavour === 'affine:attachment'; + }; + + const draftModel = yblockToDraftModal(block.yblock); + if (!isAttachmentModel(draftModel)) { + return null; + } + + return `[${draftModel.props.name}](${draftModel.props.sourceId})\n`; + }; + + const generateTableMarkdownPreview = (block: BlockDocumentInfo) => { + const isTableModel = ( + model: DraftModel | null + ): model is DraftModel => { + return model?.flavour === 'affine:table'; + }; + + const draftModel = yblockToDraftModal(block.yblock); + if (!isTableModel(draftModel)) { + return null; + } + + const url = getDocLink(block.docId, draftModel.id); + + return `[table][](${url})\n`; + }; + + const generateMarkdownPreview = async (block: BlockDocumentInfo) => { + if (markdownPreviewCache.has(block)) { + return markdownPreviewCache.get(block); + } + const flavour = block.flavour; + let markdown: string | null = null; + + if ( + flavour === 'affine:paragraph' || + flavour === 'affine:list' || + flavour === 'affine:code' + ) { + const draftModel = yblockToDraftModal(block.yblock); + markdown = + block.parentFlavour === 'affine:database' + ? generateDatabaseMarkdownPreview(block) + : ((draftModel ? await markdownAdapter.fromBlock(draftModel) : null) + ?.file ?? null); + + if (markdown) { + if (flavour === 'affine:code') { + markdown = trimCodeBlock(markdown); + } else if (flavour === 'affine:paragraph') { + markdown = trimParagraph(markdown); + } + } + } else if (flavour === 'affine:database') { + markdown = generateDatabaseMarkdownPreview(block); + } else if ( + flavour === 'affine:embed-linked-doc' || + flavour === 'affine:embed-synced-doc' + ) { + markdown = generateEmbedMarkdownPreview(block); + } else if (flavour === 'affine:attachment') { + markdown = generateAttachmentMarkdownPreview(block); + } else if (flavour === 'affine:image') { + markdown = generateImageMarkdownPreview(block); + } else if (flavour === 'affine:surface' || flavour === 'affine:page') { + // skip + } else if (flavour === 'affine:latex') { + markdown = generateLatexMarkdownPreview(block); + } else if (bookmarkFlavours.has(flavour)) { + markdown = generateBookmarkMarkdownPreview(block); + } else if (flavour === 'affine:table') { + markdown = generateTableMarkdownPreview(block); + } else { + console.warn(`unknown flavour: ${flavour}`); + } + + if (markdown && flavour === 'affine:list') { + const blockDepth = getListDepth(block); + markdown = indentMarkdown(markdown, Math.max(0, blockDepth)); + } + + markdownPreviewCache.set(block, markdown); + return markdown; + }; + + return generateMarkdownPreview; +} + +// remove the indent of the first line of list +// e.g., +// ``` +// - list item 1 +// - list item 2 +// ``` +// becomes +// ``` +// - list item 1 +// - list item 2 +// ``` +function unindentMarkdown(markdown: string) { + const lines = markdown.split('\n'); + const res: string[] = []; + let firstListFound = false; + let baseIndent = 0; + + for (let current of lines) { + const indent = current.match(/^\s*/)?.[0]?.length ?? 0; + + if (indent > 0) { + if (!firstListFound) { + // For the first list item, remove all indentation + firstListFound = true; + baseIndent = indent; + current = current.trimStart(); + } else { + // For subsequent list items, maintain relative indentation + current = + ' '.repeat(Math.max(0, indent - baseIndent)) + current.trimStart(); + } + } + + res.push(current); + } + + return res.join('\n'); +} + +export async function crawlingDocData({ + ydoc, + rootYDoc, + spaceId, + docId, +}: { + ydoc: YDoc; + rootYDoc: YDoc; + spaceId: string; + docId: string; +}): Promise<{ + blocks: IndexerDocument<'block'>[]; + preview?: string; +}> { + let docTitle = ''; + let summaryLenNeeded = 1000; + let summary = ''; + const blockDocuments: BlockDocumentInfo[] = []; + + const generateMarkdownPreview = generateMarkdownPreviewBuilder( + rootYDoc, + spaceId, + blockDocuments + ); + + const blocks = ydoc.getMap('blocks'); + + // build a parent map for quick lookup + // for each block, record its parent id + const parentMap: Record = {}; + for (const [id, block] of blocks.entries()) { + const children = block.get('sys:children') as YArray | undefined; + if (children instanceof YArray && children.length) { + for (const child of children) { + parentMap[child] = id; + } + } + } + + if (blocks.size === 0) { + return { blocks: [] }; + } + + // find the nearest block that satisfies the predicate + const nearest = ( + blockId: string, + predicate: (block: YMap) => boolean + ) => { + let current: string | null = blockId; + while (current) { + const block = blocks.get(current); + if (block && predicate(block)) { + return block; + } + current = parentMap[current] ?? null; + } + return null; + }; + + const nearestByFlavour = (blockId: string, flavour: string) => + nearest(blockId, block => block.get('sys:flavour') === flavour); + + let rootBlockId: string | null = null; + for (const block of blocks.values()) { + const flavour = block.get('sys:flavour')?.toString(); + const blockId = block.get('sys:id')?.toString(); + if (flavour === 'affine:page' && blockId) { + rootBlockId = blockId; + } + } + + if (!rootBlockId) { + return { blocks: [] }; + } + + const queue: { parent?: string; id: string }[] = [{ id: rootBlockId }]; + const visited = new Set(); // avoid loop + + const pushChildren = (id: string, block: YMap) => { + const children = block.get('sys:children'); + if (children instanceof YArray && children.length) { + for (let i = children.length - 1; i >= 0; i--) { + const childId = children.get(i); + if (childId && !visited.has(childId)) { + queue.push({ parent: id, id: childId }); + visited.add(childId); + } + } + } + }; + + // #region first loop - generate block base info + while (queue.length) { + const next = queue.pop(); + if (!next) { + break; + } + + const { parent: parentBlockId, id: blockId } = next; + const block = blockId ? blocks.get(blockId) : null; + const parentBlock = parentBlockId ? blocks.get(parentBlockId) : null; + if (!block) { + break; + } + + const flavour = block.get('sys:flavour')?.toString(); + const parentFlavour = parentBlock?.get('sys:flavour')?.toString(); + const noteBlock = nearestByFlavour(blockId, 'affine:note'); + + // display mode: + // - both: page and edgeless -> fallback to page + // - page: only page -> page + // - edgeless: only edgeless -> edgeless + // - undefined: edgeless (assuming it is a normal element on the edgeless) + let displayMode = noteBlock?.get('prop:displayMode') ?? 'edgeless'; + + if (displayMode === 'both') { + displayMode = 'page'; + } + + const noteBlockId: string | undefined = noteBlock + ?.get('sys:id') + ?.toString(); + + pushChildren(blockId, block); + + const commonBlockProps = { + docId: ydoc.guid, + flavour, + blockId, + yblock: block, + additional: { displayMode, noteBlockId }, + }; + + if (flavour === 'affine:page') { + docTitle = block.get('prop:title').toString(); + blockDocuments.push({ ...commonBlockProps, content: docTitle }); + } else if ( + flavour === 'affine:paragraph' || + flavour === 'affine:list' || + flavour === 'affine:code' + ) { + const text = block.get('prop:text') as YText; + + if (!text) { + continue; + } + + const deltas: DeltaInsert[] = text.toDelta(); + const refs = uniq( + deltas + .flatMap(delta => { + if ( + delta.attributes && + delta.attributes.reference && + delta.attributes.reference.pageId + ) { + const { pageId: refDocId, params = {} } = + delta.attributes.reference; + return { + refDocId, + ref: JSON.stringify({ docId: refDocId, ...params }), + }; + } + return null; + }) + .filter(ref => !!ref) + ); + + const databaseName = + flavour === 'affine:paragraph' && parentFlavour === 'affine:database' // if block is a database row + ? parentBlock?.get('prop:title')?.toString() + : undefined; + + blockDocuments.push({ + ...commonBlockProps, + content: text.toString(), + ...refs.reduce<{ refDocId: string[]; ref: string[] }>( + (prev, curr) => { + prev.refDocId.push(curr.refDocId); + prev.ref.push(curr.ref); + return prev; + }, + { refDocId: [], ref: [] } + ), + parentFlavour, + parentBlockId, + additional: { ...commonBlockProps.additional, databaseName }, + }); + + if (summaryLenNeeded > 0) { + summary += text.toString(); + summaryLenNeeded -= text.length; + } + } else if ( + flavour === 'affine:embed-linked-doc' || + flavour === 'affine:embed-synced-doc' + ) { + const pageId = block.get('prop:pageId'); + if (typeof pageId === 'string') { + // reference info + const params = block.get('prop:params') ?? {}; + blockDocuments.push({ + ...commonBlockProps, + refDocId: [pageId], + ref: [JSON.stringify({ docId: pageId, ...params })], + parentFlavour, + parentBlockId, + }); + } + } else if (flavour === 'affine:attachment' || flavour === 'affine:image') { + const blobId = block.get('prop:sourceId'); + if (typeof blobId === 'string') { + blockDocuments.push({ + ...commonBlockProps, + blob: [blobId], + parentFlavour, + parentBlockId, + }); + } + } else if (flavour === 'affine:surface') { + const texts = []; + + const elementsObj = block.get('prop:elements'); + if ( + !( + elementsObj instanceof YMap && + elementsObj.get('type') === '$blocksuite:internal:native$' + ) + ) { + continue; + } + const elements = elementsObj.get('value') as YMap; + if (!(elements instanceof YMap)) { + continue; + } + + for (const element of elements.values()) { + if (!(element instanceof YMap)) { + continue; + } + const text = element.get('text') as YText; + if (!text) { + continue; + } + + texts.push(text.toString()); + } + + blockDocuments.push({ + ...commonBlockProps, + content: texts, + parentFlavour, + parentBlockId, + }); + } else if (flavour === 'affine:database') { + const texts = []; + const columnsObj = block.get('prop:columns'); + const databaseTitle = block.get('prop:title'); + if (databaseTitle instanceof YText) { + texts.push(databaseTitle.toString()); + } + if (columnsObj instanceof YArray) { + for (const column of columnsObj) { + if (!(column instanceof YMap)) { + continue; + } + if (typeof column.get('name') === 'string') { + texts.push(column.get('name')); + } + + const data = column.get('data'); + if (!(data instanceof YMap)) { + continue; + } + const options = data.get('options'); + if (!(options instanceof YArray)) { + continue; + } + for (const option of options) { + if (!(option instanceof YMap)) { + continue; + } + const value = option.get('value'); + if (typeof value === 'string') { + texts.push(value); + } + } + } + } + + blockDocuments.push({ + ...commonBlockProps, + content: texts, + additional: { + ...commonBlockProps.additional, + databaseName: databaseTitle?.toString(), + }, + }); + } else if (flavour === 'affine:latex') { + blockDocuments.push({ + ...commonBlockProps, + content: block.get('prop:latex')?.toString() ?? '', + }); + } else if (flavour === 'affine:table') { + const contents = Array.from(block.keys()) + .map(key => { + if (key.startsWith('prop:cells.') && key.endsWith('.text')) { + return block.get(key)?.toString() ?? ''; + } + return ''; + }) + .filter(Boolean); + blockDocuments.push({ + ...commonBlockProps, + content: contents, + }); + } else if (bookmarkFlavours.has(flavour)) { + blockDocuments.push({ ...commonBlockProps }); + } + } + // #endregion + + // #region second loop - generate markdown preview + const TARGET_PREVIEW_CHARACTER = 500; + const TARGET_PREVIOUS_BLOCK = 1; + const TARGET_FOLLOW_BLOCK = 4; + for (const block of blockDocuments) { + if (block.ref?.length) { + const target = block; + + // should only generate the markdown preview belong to the same affine:note + const noteBlock = nearestByFlavour(block.blockId, 'affine:note'); + + const sameNoteBlocks = noteBlock + ? blockDocuments.filter( + candidate => + nearestByFlavour(candidate.blockId, 'affine:note') === noteBlock + ) + : []; + + // only generate markdown preview for reference blocks + let previewText = (await generateMarkdownPreview(target)) ?? ''; + let previousBlock = 0; + let followBlock = 0; + let previousIndex = sameNoteBlocks.findIndex( + block => block.blockId === target.blockId + ); + let followIndex = previousIndex; + + while ( + !( + ( + previewText.length > TARGET_PREVIEW_CHARACTER || // stop if preview text reaches the limit + ((previousBlock >= TARGET_PREVIOUS_BLOCK || previousIndex < 0) && + (followBlock >= TARGET_FOLLOW_BLOCK || + followIndex >= sameNoteBlocks.length)) + ) // stop if no more blocks, or preview block reaches the limit + ) + ) { + if (previousBlock < TARGET_PREVIOUS_BLOCK) { + previousIndex--; + const block = + previousIndex >= 0 ? sameNoteBlocks.at(previousIndex) : null; + const markdown = block ? await generateMarkdownPreview(block) : null; + if ( + markdown && + !previewText.startsWith( + markdown + ) /* A small hack to skip blocks with the same content */ + ) { + previewText = markdown + '\n' + previewText; + previousBlock++; + } + } + + if (followBlock < TARGET_FOLLOW_BLOCK) { + followIndex++; + const block = sameNoteBlocks.at(followIndex); + const markdown = block ? await generateMarkdownPreview(block) : null; + if ( + markdown && + !previewText.endsWith( + markdown + ) /* A small hack to skip blocks with the same content */ + ) { + previewText = previewText + '\n' + markdown; + followBlock++; + } + } + } + + block.markdownPreview = unindentMarkdown(previewText); + } + } + // #endregion + + return { + blocks: blockDocuments.map(block => + IndexerDocument.from<'block'>(`${docId}:${block.blockId}`, { + docId: block.docId, + blockId: block.blockId, + content: block.content, + flavour: block.flavour, + blob: block.blob, + refDocId: block.refDocId, + ref: block.ref, + parentFlavour: block.parentFlavour, + parentBlockId: block.parentBlockId, + additional: block.additional + ? JSON.stringify(block.additional) + : undefined, + markdownPreview: block.markdownPreview, + }) + ), + preview: summary, + }; +} diff --git a/packages/common/nbstore/src/sync/indexer/index.ts b/packages/common/nbstore/src/sync/indexer/index.ts new file mode 100644 index 0000000000..ff453f54ee --- /dev/null +++ b/packages/common/nbstore/src/sync/indexer/index.ts @@ -0,0 +1,579 @@ +import { + filter, + first, + Observable, + ReplaySubject, + share, + Subject, + throttleTime, +} from 'rxjs'; +import { + applyUpdate, + type Array as YArray, + Doc as YDoc, + type Map as YMap, +} from 'yjs'; + +import { + type DocStorage, + IndexerDocument, + type IndexerStorage, +} from '../../storage'; +import type { IndexerSyncStorage } from '../../storage/indexer-sync'; +import { AsyncPriorityQueue } from '../../utils/async-priority-queue'; +import { takeUntilAbort } from '../../utils/take-until-abort'; +import { MANUALLY_STOP, throwIfAborted } from '../../utils/throw-if-aborted'; +import { crawlingDocData } from './crawler'; + +export interface IndexerSyncState { + /** + * Number of documents currently in the indexing queue + */ + indexing: number; + /** + * Indicates whether all documents have been successfully indexed + * + * This is only for UI display purposes. For logical operations, please use `waitForCompleted()` + */ + completed: boolean; + /** + * Total number of documents in the workspace + */ + total: number; + errorMessage: string | null; +} + +export interface IndexerDocSyncState { + /** + * Indicates whether this document is currently in the indexing queue + */ + indexing: boolean; + /** + * Indicates whether this document has been successfully indexed + * + * This is only for UI display purposes. For logical operations, please use `waitForDocCompleted()` + */ + completed: boolean; +} + +export interface IndexerSync { + state$: Observable; + docState$(docId: string): Observable; + addPriority(docId: string, priority: number): () => void; + waitForCompleted(signal?: AbortSignal): Promise; + waitForDocCompleted(docId: string, signal?: AbortSignal): Promise; +} + +export class IndexerSyncImpl implements IndexerSync { + private abort: AbortController | null = null; + private readonly rootDocId = this.doc.spaceId; + private readonly status = new IndexerSyncStatus(this.rootDocId); + + state$ = this.status.state$.pipe( + // throttle the state to 1 second to avoid spamming the UI + throttleTime(1000) + ); + docState$(docId: string) { + return this.status.docState$(docId).pipe( + // throttle the state to 1 second to avoid spamming the UI + throttleTime(1000) + ); + } + + waitForCompleted(signal?: AbortSignal) { + return new Promise((resolve, reject) => { + this.status.state$ + .pipe( + filter(state => state.completed), + takeUntilAbort(signal), + first() + ) + .subscribe({ + next: () => { + resolve(); + }, + error: err => { + reject(err); + }, + }); + }); + } + + waitForDocCompleted(docId: string, signal?: AbortSignal) { + return new Promise((resolve, reject) => { + this.status + .docState$(docId) + .pipe( + filter(state => state.completed), + takeUntilAbort(signal), + first() + ) + .subscribe({ + next: () => { + resolve(); + }, + error: err => { + reject(err); + }, + }); + }); + } + + readonly interval = () => + new Promise(resolve => + requestIdleCallback(() => resolve(), { + timeout: 200, + }) + ); + + constructor( + readonly doc: DocStorage, + readonly indexer: IndexerStorage, + readonly indexerSync: IndexerSyncStorage + ) {} + + start() { + if (this.abort) { + this.abort.abort(MANUALLY_STOP); + } + + const abort = new AbortController(); + this.abort = abort; + + this.mainLoop(abort.signal).catch(error => { + if (error === MANUALLY_STOP) { + return; + } + console.error('index error', error); + }); + } + + stop() { + this.abort?.abort(MANUALLY_STOP); + this.abort = null; + } + + addPriority(id: string, priority: number) { + return this.status.addPriority(id, priority); + } + + private async mainLoop(signal?: AbortSignal) { + if (this.indexer.isReadonly) { + return; + } + + while (true) { + try { + await this.retryLoop(signal); + } catch (error) { + if (signal?.aborted) { + return; + } + console.error('index error, retry in 5s', error); + this.status.errorMessage = + error instanceof Error ? error.message : `${error}`; + this.status.statusUpdatedSubject$.next(true); + } finally { + // reset all status + this.status.reset(); + // wait for 5s before next retry + await Promise.race([ + new Promise(resolve => { + setTimeout(resolve, 5000); + }), + new Promise((_, reject) => { + // exit if manually stopped + if (signal?.aborted) { + reject(signal.reason); + } + signal?.addEventListener('abort', () => { + reject(signal.reason); + }); + }), + ]); + } + } + } + + private async retryLoop(signal?: AbortSignal) { + await Promise.race([ + Promise.all([ + this.doc.connection.waitForConnected(signal), + this.indexer.connection.waitForConnected(signal), + this.indexerSync.connection.waitForConnected(signal), + ]), + new Promise((_, reject) => { + setTimeout(() => { + reject(new Error('Connect to remote timeout')); + }, 1000 * 30); + }), + new Promise((_, reject) => { + signal?.addEventListener('abort', reason => { + reject(reason); + }); + }), + ]); + + this.status.errorMessage = null; + this.status.statusUpdatedSubject$.next(true); + + console.log('indexer sync start'); + + const unsubscribe = this.doc.subscribeDocUpdate(update => { + if (!this.status.rootDocReady) { + return; + } + if (update.docId === this.rootDocId) { + applyUpdate(this.status.rootDoc, update.bin); + + const allDocs = this.getAllDocsFromRootDoc(); + + for (const [docId, { title }] of allDocs) { + const existingDoc = this.status.docsInRootDoc.get(docId); + if (!existingDoc) { + this.status.scheduleJob(docId); + this.status.docsInRootDoc.set(docId, { title }); + this.status.statusUpdatedSubject$.next(docId); + } else { + if (existingDoc.title !== title) { + this.status.docsInRootDoc.set(docId, { title }); + this.status.statusUpdatedSubject$.next(docId); + } + } + } + + for (const docId of this.status.docsInRootDoc.keys()) { + if (!allDocs.has(docId)) { + this.status.docsInRootDoc.delete(docId); + this.status.statusUpdatedSubject$.next(docId); + } + } + this.status.scheduleJob(this.rootDocId); + } else { + const docId = update.docId; + const existingDoc = this.status.docsInRootDoc.get(docId); + if (existingDoc) { + this.status.scheduleJob(docId); + } + } + }); + + try { + const rootDocBin = (await this.doc.getDoc(this.rootDocId))?.bin; + if (rootDocBin) { + applyUpdate(this.status.rootDoc, rootDocBin); + } + + this.status.scheduleJob(this.rootDocId); + + const allDocs = this.getAllDocsFromRootDoc(); + this.status.docsInRootDoc = allDocs; + this.status.statusUpdatedSubject$.next(true); + + for (const docId of allDocs.keys()) { + this.status.scheduleJob(docId); + } + + this.status.rootDocReady = true; + this.status.statusUpdatedSubject$.next(true); + + const allIndexedDocs = await this.getAllDocsFromIndexer(); + this.status.docsInIndexer = allIndexedDocs; + this.status.statusUpdatedSubject$.next(true); + + while (true) { + throwIfAborted(signal); + + const docId = await this.status.acceptJob(signal); + + if (docId === this.rootDocId) { + // #region crawl root doc + for (const [docId, { title }] of this.status.docsInRootDoc) { + const existingDoc = this.status.docsInIndexer.get(docId); + if (existingDoc) { + if (existingDoc.title !== title) { + // need update + await this.indexer.update( + 'doc', + IndexerDocument.from(docId, { + docId, + title, + }) + ); + this.status.docsInIndexer.set(docId, { title }); + this.status.statusUpdatedSubject$.next(docId); + } + } else { + // need add + await this.indexer.insert( + 'doc', + IndexerDocument.from(docId, { + docId, + title, + }) + ); + this.status.docsInIndexer.set(docId, { title }); + this.status.statusUpdatedSubject$.next(docId); + } + } + + for (const docId of this.status.docsInIndexer.keys()) { + if (!this.status.docsInRootDoc.has(docId)) { + await this.indexer.delete('doc', docId); + await this.indexer.deleteByQuery('block', { + type: 'match', + field: 'docId', + match: docId, + }); + await this.indexerSync.clearDocIndexedClock(docId); + this.status.docsInIndexer.delete(docId); + this.status.statusUpdatedSubject$.next(docId); + } + } + await this.indexer.refresh('block'); + await this.indexer.refresh('doc'); + // #endregion + } else { + // #region crawl doc + const existingDoc = this.status.docsInIndexer.get(docId); + if (!existingDoc) { + // doc is deleted, just skip + continue; + } + + const docClock = await this.doc.getDocTimestamp(docId); + if (!docClock) { + // doc is deleted, just skip + continue; + } + + const docIndexedClock = + await this.indexerSync.getDocIndexedClock(docId); + if ( + docIndexedClock && + docIndexedClock.timestamp.getTime() === docClock.timestamp.getTime() + ) { + // doc is already indexed, just skip + continue; + } + + const docBin = await this.doc.getDoc(docId); + if (!docBin) { + // doc is deleted, just skip + continue; + } + const docYDoc = new YDoc({ guid: docId }); + applyUpdate(docYDoc, docBin.bin); + + let blocks: IndexerDocument<'block'>[] = []; + let preview: string | undefined; + + try { + const result = await crawlingDocData({ + ydoc: docYDoc, + rootYDoc: this.status.rootDoc, + spaceId: this.status.rootDocId, + docId, + }); + blocks = result.blocks; + preview = result.preview; + } catch (error) { + console.error('error crawling doc', error); + } + + await this.indexer.deleteByQuery('block', { + type: 'match', + field: 'docId', + match: docId, + }); + + for (const block of blocks) { + await this.indexer.insert('block', block); + } + + await this.indexer.refresh('block'); + + if (preview) { + await this.indexer.update( + 'doc', + IndexerDocument.from(docId, { + summary: preview, + }) + ); + await this.indexer.refresh('doc'); + } + + await this.indexerSync.setDocIndexedClock({ + docId, + timestamp: docClock.timestamp, + }); + // #endregion + } + + this.status.completeJob(); + } + } finally { + unsubscribe(); + } + } + + /** + * Get all docs from the root doc, without deleted docs + */ + private getAllDocsFromRootDoc() { + const docs = this.status.rootDoc.getMap('meta').get('pages') as + | YArray> + | undefined; + const availableDocs = new Map(); + + if (docs) { + for (const page of docs) { + const docId = page.get('id'); + + if (typeof docId !== 'string') { + continue; + } + + const inTrash = page.get('trash') ?? false; + const title = page.get('title'); + + if (!inTrash) { + availableDocs.set(docId, { title }); + } + } + } + + return availableDocs; + } + + private async getAllDocsFromIndexer() { + const docs = await this.indexer.search( + 'doc', + { + type: 'all', + }, + { + pagination: { + limit: Infinity, + }, + fields: ['docId', 'title'], + } + ); + + return new Map( + docs.nodes.map(node => { + const title = node.fields.title; + return [ + node.id, + { + title: typeof title === 'string' ? title : title.at(0), + }, + ]; + }) + ); + } +} + +class IndexerSyncStatus { + prioritySettings = new Map(); + jobs = new AsyncPriorityQueue(); + rootDoc = new YDoc({ guid: this.rootDocId }); + rootDocReady = false; + docsInIndexer = new Map(); + docsInRootDoc = new Map(); + currentJob: string | null = null; + errorMessage: string | null = null; + statusUpdatedSubject$ = new Subject(); + + state$ = new Observable(subscribe => { + const next = () => { + subscribe.next({ + indexing: this.jobs.length() + (this.currentJob ? 1 : 0), + total: this.docsInRootDoc.size + 1, + errorMessage: this.errorMessage, + completed: this.rootDocReady && this.jobs.length() === 0, + }); + }; + next(); + const dispose = this.statusUpdatedSubject$.subscribe(() => { + next(); + }); + return () => { + dispose.unsubscribe(); + }; + }).pipe( + share({ + connector: () => new ReplaySubject(1), + }) + ); + + docState$(docId: string) { + return new Observable(subscribe => { + const next = () => { + subscribe.next({ + indexing: this.jobs.has(docId), + completed: this.docsInIndexer.has(docId) && !this.jobs.has(docId), + }); + }; + next(); + const dispose = this.statusUpdatedSubject$.subscribe(updatedDocId => { + if (updatedDocId === docId || updatedDocId === true) { + next(); + } + }); + return () => { + dispose.unsubscribe(); + }; + }).pipe( + share({ + connector: () => new ReplaySubject(1), + }) + ); + } + + constructor(readonly rootDocId: string) { + this.prioritySettings.set(this.rootDocId, Infinity); + } + + scheduleJob(docId: string) { + const priority = this.prioritySettings.get(docId) ?? 0; + this.jobs.push(docId, priority); + this.statusUpdatedSubject$.next(docId); + } + + async acceptJob(abort?: AbortSignal) { + const job = await this.jobs.asyncPop(abort); + this.currentJob = job; + this.statusUpdatedSubject$.next(job); + return job; + } + + completeJob() { + const job = this.currentJob; + this.currentJob = null; + this.statusUpdatedSubject$.next(job ?? true); + } + + addPriority(id: string, priority: number) { + const oldPriority = this.prioritySettings.get(id) ?? 0; + this.prioritySettings.set(id, priority); + this.jobs.setPriority(id, oldPriority + priority); + + return () => { + const currentPriority = this.prioritySettings.get(id) ?? 0; + this.prioritySettings.set(id, currentPriority - priority); + this.jobs.setPriority(id, currentPriority - priority); + }; + } + + reset() { + // reset all state, except prioritySettings + this.jobs.clear(); + this.docsInRootDoc.clear(); + this.docsInIndexer.clear(); + this.rootDoc = new YDoc(); + this.rootDocReady = false; + this.currentJob = null; + this.statusUpdatedSubject$.next(true); + } +} diff --git a/packages/common/nbstore/src/utils/priority-queue.ts b/packages/common/nbstore/src/utils/priority-queue.ts index 3af09acc29..9cee608840 100644 --- a/packages/common/nbstore/src/utils/priority-queue.ts +++ b/packages/common/nbstore/src/utils/priority-queue.ts @@ -52,6 +52,10 @@ export class PriorityQueue { return removed; } + has(id: string) { + return this.priorityMap.has(id); + } + clear() { this.tree.clear(); this.priorityMap.clear(); @@ -64,6 +68,6 @@ export class PriorityQueue { } get length() { - return this.tree.count; + return this.tree.count.bind(this.tree); } } diff --git a/packages/common/nbstore/src/utils/take-until-abort.ts b/packages/common/nbstore/src/utils/take-until-abort.ts new file mode 100644 index 0000000000..ce1488a65f --- /dev/null +++ b/packages/common/nbstore/src/utils/take-until-abort.ts @@ -0,0 +1,42 @@ +import { Observable, type OperatorFunction } from 'rxjs'; + +/** + * Creates an operator that takes values from the source Observable until the given AbortSignal aborts. + * When the signal aborts, the Observable completes. + * + * @param signal - The AbortSignal that will trigger completion when aborted + * @returns An operator function that takes values until the signal aborts + */ +export function takeUntilAbort( + signal?: AbortSignal +): OperatorFunction { + return (source$: Observable) => { + return new Observable(subscriber => { + if (signal?.aborted) { + subscriber.error(signal.reason); + return; + } + + const abortHandler = () => { + subscriber.error(signal?.reason); + }; + + if (signal) { + signal.addEventListener('abort', abortHandler); + } + + const subscription = source$.subscribe({ + next: value => subscriber.next(value), + error: err => subscriber.error(err), + complete: () => subscriber.complete(), + }); + + return () => { + if (signal) { + signal.removeEventListener('abort', abortHandler); + } + subscription.unsubscribe(); + }; + }); + }; +} diff --git a/packages/common/nbstore/src/worker/client.ts b/packages/common/nbstore/src/worker/client.ts index 6852c8c1dd..5ded685d22 100644 --- a/packages/common/nbstore/src/worker/client.ts +++ b/packages/common/nbstore/src/worker/client.ts @@ -1,20 +1,35 @@ import { OpClient, transfer } from '@toeverything/infra/op'; +import type { Observable } from 'rxjs'; import { v4 as uuid } from 'uuid'; import { DummyConnection } from '../connection'; -import { AwarenessFrontend, BlobFrontend, DocFrontend } from '../frontend'; import { + AwarenessFrontend, + BlobFrontend, + DocFrontend, + IndexerFrontend, +} from '../frontend'; +import { + type AggregateOptions, + type AggregateResult, type AwarenessRecord, type BlobRecord, type BlobStorage, type DocRecord, type DocStorage, type DocUpdate, + type IndexerDocument, + type IndexerSchema, + type IndexerStorage, type ListedBlobRecord, + type Query, + type SearchOptions, + type SearchResult, } from '../storage'; import type { AwarenessSync } from '../sync/awareness'; import type { BlobSync } from '../sync/blob'; import type { DocSync } from '../sync/doc'; +import type { IndexerSync } from '../sync/indexer'; import type { StoreInitOptions, WorkerManagerOps, WorkerOps } from './ops'; export type { StoreInitOptions as WorkerInitOptions } from './ops'; @@ -85,6 +100,12 @@ export class StoreClient { this.docFrontend = new DocFrontend(this.docStorage, this.docSync); this.blobFrontend = new BlobFrontend(this.blobStorage, this.blobSync); this.awarenessFrontend = new AwarenessFrontend(this.awarenessSync); + this.indexerStorage = new WorkerIndexerStorage(this.client); + this.indexerSync = new WorkerIndexerSync(this.client); + this.indexerFrontend = new IndexerFrontend( + this.indexerStorage, + this.indexerSync + ); } private readonly docStorage: WorkerDocStorage; @@ -92,14 +113,18 @@ export class StoreClient { private readonly docSync: WorkerDocSync; private readonly blobSync: WorkerBlobSync; private readonly awarenessSync: WorkerAwarenessSync; + private readonly indexerStorage: WorkerIndexerStorage; + private readonly indexerSync: WorkerIndexerSync; readonly docFrontend: DocFrontend; readonly blobFrontend: BlobFrontend; readonly awarenessFrontend: AwarenessFrontend; + readonly indexerFrontend: IndexerFrontend; } class WorkerDocStorage implements DocStorage { constructor(private readonly client: OpClient) {} + spaceId = ''; readonly storageType = 'doc'; readonly isReadonly = false; @@ -316,3 +341,146 @@ class WorkerAwarenessSync implements AwarenessSync { }; } } + +class WorkerIndexerStorage implements IndexerStorage { + constructor(private readonly client: OpClient) {} + readonly storageType = 'indexer'; + readonly isReadonly = true; + connection = new DummyConnection(); + + search>( + table: T, + query: Query, + options?: O + ): Promise> { + return this.client.call('indexerStorage.search', { table, query, options }); + } + aggregate>( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Promise> { + return this.client.call('indexerStorage.aggregate', { + table, + query, + field: field as string, + options, + }); + } + search$>( + table: T, + query: Query, + options?: O + ): Observable> { + return this.client.ob$('indexerStorage.subscribeSearch', { + table, + query, + options, + }); + } + aggregate$< + T extends keyof IndexerSchema, + const O extends AggregateOptions, + >( + table: T, + query: Query, + field: keyof IndexerSchema[T], + options?: O + ): Observable> { + return this.client.ob$('indexerStorage.subscribeAggregate', { + table, + query, + field: field as string, + options, + }); + } + deleteByQuery( + _table: T, + _query: Query + ): Promise { + throw new Error('Method not implemented.'); + } + insert( + _table: T, + _document: IndexerDocument + ): Promise { + throw new Error('Method not implemented.'); + } + delete(_table: T, _id: string): Promise { + throw new Error('Method not implemented.'); + } + update( + _table: T, + _document: IndexerDocument + ): Promise { + throw new Error('Method not implemented.'); + } + refresh(_table: T): Promise { + throw new Error('Method not implemented.'); + } +} + +class WorkerIndexerSync implements IndexerSync { + constructor(private readonly client: OpClient) {} + waitForCompleted(signal?: AbortSignal): Promise { + return new Promise((resolve, reject) => { + const abortListener = () => { + reject(signal?.reason); + subscription.unsubscribe(); + }; + + signal?.addEventListener('abort', abortListener); + + const subscription = this.client + .ob$('indexerSync.waitForCompleted') + .subscribe({ + complete() { + signal?.removeEventListener('abort', abortListener); + resolve(); + }, + error(err) { + signal?.removeEventListener('abort', abortListener); + reject(err); + }, + }); + }); + } + waitForDocCompleted(docId: string, signal?: AbortSignal): Promise { + return new Promise((resolve, reject) => { + const abortListener = () => { + reject(signal?.reason); + subscription.unsubscribe(); + }; + + signal?.addEventListener('abort', abortListener); + + const subscription = this.client + .ob$('indexerSync.waitForDocCompleted', docId) + .subscribe({ + complete() { + signal?.removeEventListener('abort', abortListener); + resolve(); + }, + error(err) { + signal?.removeEventListener('abort', abortListener); + reject(err); + }, + }); + }); + } + get state$() { + return this.client.ob$('indexerSync.state'); + } + docState$(docId: string) { + return this.client.ob$('indexerSync.docState', docId); + } + addPriority(docId: string, priority: number) { + const subscription = this.client + .ob$('indexerSync.addPriority', { docId, priority }) + .subscribe(); + return () => { + subscription.unsubscribe(); + }; + } +} diff --git a/packages/common/nbstore/src/worker/consumer.ts b/packages/common/nbstore/src/worker/consumer.ts index 315c1d0297..ebe8ea8fc4 100644 --- a/packages/common/nbstore/src/worker/consumer.ts +++ b/packages/common/nbstore/src/worker/consumer.ts @@ -1,4 +1,3 @@ -import { MANUALLY_STOP } from '@toeverything/infra'; import { OpConsumer } from '@toeverything/infra/op'; import { Observable } from 'rxjs'; @@ -7,6 +6,7 @@ import { SpaceStorage } from '../storage'; import type { AwarenessRecord } from '../storage/awareness'; import { Sync } from '../sync'; import type { PeerStorageOptions } from '../sync/types'; +import { MANUALLY_STOP } from '../utils/throw-if-aborted'; import type { StoreInitOptions, WorkerManagerOps, WorkerOps } from './ops'; export type { WorkerManagerOps }; @@ -57,6 +57,14 @@ class StoreConsumer { return this.ensureSync.awareness; } + get indexerStorage() { + return this.ensureLocal.get('indexer'); + } + + get indexerSync() { + return this.ensureSync.indexer; + } + constructor( private readonly availableStorageImplementations: StorageConstructor[], init: StoreInitOptions @@ -262,6 +270,48 @@ class StoreConsumer { }), 'awarenessSync.collect': ({ collectId, awareness }) => collectJobs.get(collectId)?.(awareness), + 'indexerStorage.aggregate': ({ table, query, field, options }) => + this.indexerStorage.aggregate(table, query, field, options), + 'indexerStorage.search': ({ table, query, options }) => + this.indexerStorage.search(table, query, options), + 'indexerStorage.subscribeSearch': ({ table, query, options }) => + this.indexerStorage.search$(table, query, options), + 'indexerStorage.subscribeAggregate': ({ table, query, field, options }) => + this.indexerStorage.aggregate$(table, query, field, options), + 'indexerSync.state': () => this.indexerSync.state$, + 'indexerSync.docState': (docId: string) => + this.indexerSync.docState$(docId), + 'indexerSync.addPriority': ({ docId, priority }) => + new Observable(() => { + const undo = this.indexerSync.addPriority(docId, priority); + return () => undo(); + }), + 'indexerSync.waitForCompleted': () => + new Observable(subscriber => { + this.indexerSync + .waitForCompleted() + .then(() => { + subscriber.next(); + subscriber.complete(); + }) + .catch(error => { + subscriber.error(error); + }); + }), + 'indexerSync.waitForDocCompleted': (docId: string) => + new Observable(subscriber => { + const abortController = new AbortController(); + this.indexerSync + .waitForDocCompleted(docId, abortController.signal) + .then(() => { + subscriber.next(); + subscriber.complete(); + }) + .catch(error => { + subscriber.error(error); + }); + return () => abortController.abort(MANUALLY_STOP); + }), }); } } diff --git a/packages/common/nbstore/src/worker/ops.ts b/packages/common/nbstore/src/worker/ops.ts index 019e58ec49..1f5ebb4da8 100644 --- a/packages/common/nbstore/src/worker/ops.ts +++ b/packages/common/nbstore/src/worker/ops.ts @@ -1,5 +1,7 @@ import type { AvailableStorageImplementations } from '../impls'; import type { + AggregateOptions, + AggregateResult, BlobRecord, DocClock, DocClocks, @@ -7,11 +9,15 @@ import type { DocRecord, DocUpdate, ListedBlobRecord, + Query, + SearchOptions, + SearchResult, StorageType, } from '../storage'; import type { AwarenessRecord } from '../storage/awareness'; import type { BlobSyncBlobState, BlobSyncState } from '../sync/blob'; import type { DocSyncDocState, DocSyncState } from '../sync/doc'; +import type { IndexerDocSyncState, IndexerSyncState } from '../sync/indexer'; type StorageInitOptions = Values<{ [key in keyof AvailableStorageImplementations]: { @@ -61,6 +67,35 @@ interface GroupedWorkerOps { collect: [{ collectId: string; awareness: AwarenessRecord }, void]; }; + indexerStorage: { + search: [ + { table: string; query: Query; options?: SearchOptions }, + SearchResult, + ]; + aggregate: [ + { + table: string; + query: Query; + field: string; + options?: AggregateOptions; + }, + AggregateResult, + ]; + subscribeSearch: [ + { table: string; query: Query; options?: SearchOptions }, + SearchResult, + ]; + subscribeAggregate: [ + { + table: string; + query: Query; + field: string; + options?: AggregateOptions; + }, + AggregateResult, + ]; + }; + docSync: { state: [void, DocSyncState]; docState: [string, DocSyncDocState]; @@ -91,6 +126,14 @@ interface GroupedWorkerOps { ]; collect: [{ collectId: string; awareness: AwarenessRecord }, void]; }; + + indexerSync: { + state: [void, IndexerSyncState]; + docState: [string, IndexerDocSyncState]; + addPriority: [{ docId: string; priority: number }, boolean]; + waitForCompleted: [void, void]; + waitForDocCompleted: [string, void]; + }; } type Values = T extends { [k in keyof T]: any } ? T[keyof T] : never; diff --git a/packages/common/nbstore/tsconfig.json b/packages/common/nbstore/tsconfig.json index 67c938fb62..a03bd33485 100644 --- a/packages/common/nbstore/tsconfig.json +++ b/packages/common/nbstore/tsconfig.json @@ -9,6 +9,7 @@ "references": [ { "path": "../infra" }, { "path": "../error" }, - { "path": "../graphql" } + { "path": "../graphql" }, + { "path": "../../../blocksuite/affine/all" } ] } diff --git a/packages/frontend/apps/electron-renderer/src/background-worker/index.ts b/packages/frontend/apps/electron-renderer/src/background-worker/index.ts index 6df4d0e61c..6a1a2d3f0e 100644 --- a/packages/frontend/apps/electron-renderer/src/background-worker/index.ts +++ b/packages/frontend/apps/electron-renderer/src/background-worker/index.ts @@ -3,6 +3,7 @@ import '@affine/core/bootstrap/electron'; import { apis } from '@affine/electron-api'; import { broadcastChannelStorages } from '@affine/nbstore/broadcast-channel'; import { cloudStorages } from '@affine/nbstore/cloud'; +import { idbStoragesIndexerOnly } from '@affine/nbstore/idb'; import { bindNativeDBApis, sqliteStorages } from '@affine/nbstore/sqlite'; import { bindNativeDBV1Apis, @@ -20,6 +21,7 @@ bindNativeDBApis(apis!.nbstore); bindNativeDBV1Apis(apis!.db); const storeManager = new StoreManagerConsumer([ + ...idbStoragesIndexerOnly, ...sqliteStorages, ...sqliteV1Storages, ...broadcastChannelStorages, diff --git a/packages/frontend/apps/ios/src/worker.ts b/packages/frontend/apps/ios/src/worker.ts index f1be461b68..bb93fb9519 100644 --- a/packages/frontend/apps/ios/src/worker.ts +++ b/packages/frontend/apps/ios/src/worker.ts @@ -5,6 +5,7 @@ import { cloudStorages, configureSocketAuthMethod, } from '@affine/nbstore/cloud'; +import { idbStoragesIndexerOnly } from '@affine/nbstore/idb'; import { bindNativeDBApis, type NativeDBApis, @@ -61,6 +62,7 @@ const consumer = new OpConsumer( ); const storeManager = new StoreManagerConsumer([ + ...idbStoragesIndexerOnly, ...sqliteStorages, ...broadcastChannelStorages, ...cloudStorages, diff --git a/packages/frontend/core/src/bootstrap/polyfill/set-difference.ts b/packages/frontend/core/src/bootstrap/polyfill/set-difference.ts new file mode 100644 index 0000000000..cd458239ef --- /dev/null +++ b/packages/frontend/core/src/bootstrap/polyfill/set-difference.ts @@ -0,0 +1 @@ +import 'core-js/es/set/difference.js'; diff --git a/packages/frontend/core/src/components/page-list/page-content-preview.tsx b/packages/frontend/core/src/components/page-list/page-content-preview.tsx index bae1604b66..017fc49244 100644 --- a/packages/frontend/core/src/components/page-list/page-content-preview.tsx +++ b/packages/frontend/core/src/components/page-list/page-content-preview.tsx @@ -1,6 +1,6 @@ import { DocsSearchService } from '@affine/core/modules/docs-search'; import { LiveData, useLiveData, useService } from '@toeverything/infra'; -import { type ReactNode, useMemo } from 'react'; +import { type ReactNode, useEffect, useMemo } from 'react'; interface PagePreviewProps { pageId: string; @@ -21,6 +21,11 @@ const PagePreviewInner = ({ ) ); + useEffect(() => { + const undo = docSummary.indexer.addPriority(pageId, 100); + return undo; + }, [docSummary, pageId]); + const res = summary === null ? fallback : summary === '' ? emptyFallback : summary; return res; diff --git a/packages/frontend/core/src/mobile/components/explorer/nodes/doc/index.tsx b/packages/frontend/core/src/mobile/components/explorer/nodes/doc/index.tsx index 5fa4e35d94..b010ef7266 100644 --- a/packages/frontend/core/src/mobile/components/explorer/nodes/doc/index.tsx +++ b/packages/frontend/core/src/mobile/components/explorer/nodes/doc/index.tsx @@ -10,6 +10,7 @@ import { GlobalContextService } from '@affine/core/modules/global-context'; import { useI18n } from '@affine/i18n'; import { LiveData, + MANUALLY_STOP, useLiveData, useService, useServices, @@ -78,19 +79,23 @@ export const ExplorerDocNode = ({ ) ); - const indexerLoading = useLiveData( - docsSearchService.indexer.status$.map( - v => v.remaining === undefined || v.remaining > 0 - ) - ); const [referencesLoading, setReferencesLoading] = useState(true); useLayoutEffect(() => { - setReferencesLoading( - prev => - prev && - indexerLoading /* after loading becomes false, it never becomes true */ - ); - }, [indexerLoading]); + const abortController = new AbortController(); + docsSearchService.indexer + .waitForDocCompletedWithPriority(docId, 100, abortController.signal) + .then(() => { + setReferencesLoading(false); + }) + .catch(err => { + if (err !== MANUALLY_STOP) { + console.error(err); + } + }); + return () => { + abortController.abort(MANUALLY_STOP); + }; + }, [docId, docsSearchService]); const workspaceDialogService = useService(WorkspaceDialogService); const option = useMemo( diff --git a/packages/frontend/core/src/modules/blob-management/entity/unused-blobs.ts b/packages/frontend/core/src/modules/blob-management/entity/unused-blobs.ts index aadfa8de1d..0849741a21 100644 --- a/packages/frontend/core/src/modules/blob-management/entity/unused-blobs.ts +++ b/packages/frontend/core/src/modules/blob-management/entity/unused-blobs.ts @@ -9,7 +9,6 @@ import { } from '@toeverything/infra'; import { fileTypeFromBuffer } from 'file-type'; import { - combineLatest, EMPTY, filter, firstValueFrom, @@ -88,16 +87,9 @@ export class UnusedBlobs extends Entity { async getUnusedBlobs(abortSignal?: AbortSignal) { // Wait for both sync and indexing to complete - const ready$ = combineLatest([ - this.workspaceService.workspace.engine.doc.state$.pipe( - filter(state => state.syncing === 0 && !state.syncRetrying) - ), - this.docsSearchService.indexer.status$.pipe( - filter( - status => status.remaining === undefined || status.remaining === 0 - ) - ), - ]).pipe(map(() => true)); + const ready$ = this.workspaceService.workspace.engine.doc.state$ + .pipe(filter(state => state.syncing === 0 && !state.syncRetrying)) + .pipe(map(() => true)); await firstValueFrom( abortSignal @@ -105,6 +97,8 @@ export class UnusedBlobs extends Entity { : ready$ ); + await this.docsSearchService.indexer.waitForCompleted(abortSignal); + const [blobs, usedBlobs] = await Promise.all([ this.listBlobs(), this.getUsedBlobs(), @@ -121,7 +115,8 @@ export class UnusedBlobs extends Entity { } private async getUsedBlobs(): Promise { - const result = await this.docsSearchService.indexer.blockIndex.aggregate( + const result = await this.docsSearchService.indexer.aggregate( + 'block', { type: 'boolean', occur: 'must', diff --git a/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts b/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts deleted file mode 100644 index 86b1e573c2..0000000000 --- a/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts +++ /dev/null @@ -1,301 +0,0 @@ -import { DebugLogger } from '@affine/debug'; -import type { Job, JobQueue } from '@toeverything/infra'; -import { - Entity, - IndexedDBIndexStorage, - IndexedDBJobQueue, - JobRunner, - LiveData, -} from '@toeverything/infra'; -import { map } from 'rxjs'; - -import { WorkspaceDBService } from '../../db'; -import type { WorkspaceLocalState, WorkspaceService } from '../../workspace'; -import { blockIndexSchema, docIndexSchema } from '../schema'; -import { createWorker, type IndexerWorker } from '../worker/out-worker'; - -export function isEmptyUpdate(binary: Uint8Array) { - return ( - binary.byteLength === 0 || - (binary.byteLength === 2 && binary[0] === 0 && binary[1] === 0) - ); -} - -const logger = new DebugLogger('crawler'); -const WORKSPACE_DOCS_INDEXER_VERSION_KEY = 'docs-indexer-version'; - -interface IndexerJobPayload { - docId: string; -} - -export class DocsIndexer extends Entity { - /** - * increase this number to re-index all docs - */ - static INDEXER_VERSION = 19; - - private readonly jobQueue: JobQueue = - new IndexedDBJobQueue( - 'jq:' + this.workspaceService.workspace.id - ); - - private readonly runner = new JobRunner( - this.jobQueue, - (jobs, signal) => this.execJob(jobs, signal), - () => - new Promise(resolve => - requestIdleCallback(() => resolve(), { - timeout: 200, - }) - ) - ); - - private readonly indexStorage = new IndexedDBIndexStorage( - 'idx:' + this.workspaceService.workspace.id - ); - - readonly docIndex = this.indexStorage.getIndex('doc', docIndexSchema); - - readonly blockIndex = this.indexStorage.getIndex('block', blockIndexSchema); - - private readonly workspaceEngine = this.workspaceService.workspace.engine; - - private readonly workspaceId = this.workspaceService.workspace.id; - - private worker: IndexerWorker | null = null; - - readonly status$ = LiveData.from<{ remaining?: number }>( - this.jobQueue.status$.pipe( - map(status => ({ - remaining: status.remaining, - })) - ), - {} - ); - - constructor( - private readonly workspaceService: WorkspaceService, - private readonly workspaceLocalState: WorkspaceLocalState - ) { - super(); - } - - setupListener() { - this.workspaceEngine.doc.storage.connection - .waitForConnected() - .then(() => { - this.disposables.push( - this.workspaceEngine.doc.storage.subscribeDocUpdate(updated => { - if (WorkspaceDBService.isDBDocId(updated.docId)) { - // skip db doc - return; - } - this.jobQueue - .enqueue([ - { - batchKey: updated.docId, - payload: { docId: updated.docId }, - }, - ]) - .catch(err => { - console.error('Error enqueueing job', err); - }); - }) - ); - }) - .catch(err => { - console.error('Error waiting for doc storage connection', err); - }); - } - - async execJob(jobs: Job[], signal: AbortSignal) { - if (jobs.length === 0) { - return; - } - - const dbVersion = this.getVersion(); - - if (dbVersion > DocsIndexer.INDEXER_VERSION) { - // stop if db version is higher then self - this.runner.stop(); - throw new Error('Indexer is outdated'); - } - - const isUpgrade = dbVersion < DocsIndexer.INDEXER_VERSION; - - // jobs should have the same storage docId, so we just pick the first one - const docId = jobs[0].payload.docId; - - const worker = await this.ensureWorker(signal); - - const startTime = performance.now(); - logger.debug('Start crawling job for docId:', docId); - - let workerOutput; - - if (docId === this.workspaceId) { - const rootDocBuffer = ( - await this.workspaceEngine.doc.storage.getDoc(this.workspaceId) - )?.bin; - if (!rootDocBuffer) { - return; - } - - const allIndexedDocs = (await this.docIndex.getAll()).map(d => d.id); - - workerOutput = await worker.run({ - type: 'rootDoc', - allIndexedDocs, - rootDocBuffer, - reindexAll: isUpgrade, - rootDocId: this.workspaceId, - }); - } else { - const rootDocBuffer = ( - await this.workspaceEngine.doc.storage.getDoc(this.workspaceId) - )?.bin; - - const docBuffer = - (await this.workspaceEngine.doc.storage.getDoc(docId))?.bin ?? - new Uint8Array(0); - - if (!rootDocBuffer) { - return; - } - - workerOutput = await worker.run({ - type: 'doc', - docBuffer, - docId, - rootDocBuffer, - rootDocId: this.workspaceId, - }); - } - - if (workerOutput.deletedDoc || workerOutput.addedDoc) { - if (workerOutput.deletedDoc) { - const docIndexWriter = await this.docIndex.write(); - for (const docId of workerOutput.deletedDoc) { - docIndexWriter.delete(docId); - } - await docIndexWriter.commit(); - const blockIndexWriter = await this.blockIndex.write(); - for (const docId of workerOutput.deletedDoc) { - const oldBlocks = await blockIndexWriter.search( - { - type: 'match', - field: 'docId', - match: docId, - }, - { - pagination: { - limit: Number.MAX_SAFE_INTEGER, - }, - } - ); - for (const block of oldBlocks.nodes) { - blockIndexWriter.delete(block.id); - } - } - await blockIndexWriter.commit(); - } - if (workerOutput.addedDoc) { - const docIndexWriter = await this.docIndex.write(); - for (const { doc } of workerOutput.addedDoc) { - docIndexWriter.put(doc); - } - await docIndexWriter.commit(); - const blockIndexWriter = await this.blockIndex.write(); - for (const { id, blocks } of workerOutput.addedDoc) { - // delete old blocks - const oldBlocks = await blockIndexWriter.search( - { - type: 'match', - field: 'docId', - match: id, - }, - { - pagination: { - limit: Number.MAX_SAFE_INTEGER, - }, - } - ); - for (const block of oldBlocks.nodes) { - blockIndexWriter.delete(block.id); - } - for (const block of blocks) { - blockIndexWriter.insert(block); - } - } - await blockIndexWriter.commit(); - } - } - - if (workerOutput.reindexDoc) { - await this.jobQueue.enqueue( - workerOutput.reindexDoc.map(({ docId }) => ({ - batchKey: docId, - payload: { docId }, - })) - ); - } - - if (isUpgrade) { - this.setVersion(); - } - - const duration = performance.now() - startTime; - logger.debug( - 'Finish crawling job for docId:' + docId + ' in ' + duration + 'ms ' - ); - } - - startCrawling() { - this.runner.start(); - - this.jobQueue - .enqueue([ - { - batchKey: this.workspaceId, - payload: { docId: this.workspaceId }, - }, - ]) - .catch(err => { - console.error('Error enqueueing job', err); - }); - } - - async ensureWorker(signal: AbortSignal): Promise { - if (!this.worker) { - this.worker = await createWorker(signal); - } - return this.worker; - } - - getVersion() { - const version = this.workspaceLocalState.get( - WORKSPACE_DOCS_INDEXER_VERSION_KEY - ); - if (typeof version !== 'number') { - return -1; - } else { - return version; - } - } - - setVersion(version = DocsIndexer.INDEXER_VERSION) { - if (this.getVersion() >= version) { - return; - } - return this.workspaceLocalState.set( - WORKSPACE_DOCS_INDEXER_VERSION_KEY, - version - ); - } - - override dispose(): void { - super.dispose(); - this.runner.stop(); - this.worker?.dispose(); - } -} diff --git a/packages/frontend/core/src/modules/docs-search/index.ts b/packages/frontend/core/src/modules/docs-search/index.ts index 1f9db66e18..10e15e02c7 100644 --- a/packages/frontend/core/src/modules/docs-search/index.ts +++ b/packages/frontend/core/src/modules/docs-search/index.ts @@ -2,17 +2,12 @@ export { DocsSearchService } from './services/docs-search'; import { type Framework } from '@toeverything/infra'; -import { - WorkspaceLocalState, - WorkspaceScope, - WorkspaceService, -} from '../workspace'; -import { DocsIndexer } from './entities/docs-indexer'; +import { DocsService } from '../doc'; +import { WorkspaceScope, WorkspaceService } from '../workspace'; import { DocsSearchService } from './services/docs-search'; export function configureDocsSearchModule(framework: Framework) { framework .scope(WorkspaceScope) - .service(DocsSearchService, [WorkspaceService]) - .entity(DocsIndexer, [WorkspaceService, WorkspaceLocalState]); + .service(DocsSearchService, [WorkspaceService, DocsService]); } diff --git a/packages/frontend/core/src/modules/docs-search/schema.ts b/packages/frontend/core/src/modules/docs-search/schema.ts deleted file mode 100644 index d50d52b638..0000000000 --- a/packages/frontend/core/src/modules/docs-search/schema.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { defineSchema } from '@toeverything/infra'; - -export const docIndexSchema = defineSchema({ - docId: 'String', - title: 'FullText', - // summary of the doc, used for preview - summary: { type: 'String', index: false }, - journal: 'String', -}); - -export type DocIndexSchema = typeof docIndexSchema; - -export const blockIndexSchema = defineSchema({ - docId: 'String', - blockId: 'String', - content: 'FullText', - flavour: 'String', - blob: 'String', - // reference doc id - // ['xxx','yyy'] - refDocId: 'String', - // reference info, used for backlink to specific block - // [{"docId":"xxx","mode":"page","blockIds":["gt5Yfq1maYvgNgpi13rIq"]},{"docId":"yyy","mode":"edgeless","blockIds":["k5prpOlDF-9CzfatmO0W7"]}] - ref: { type: 'String', index: false }, - // parent block flavour - parentFlavour: 'String', - // parent block id - parentBlockId: 'String', - // additional info - // { "databaseName": "xxx", "displayMode": "page/edgeless", "noteBlockId": "xxx" } - additional: { type: 'String', index: false }, - markdownPreview: { type: 'String', index: false }, -}); - -export type BlockIndexSchema = typeof blockIndexSchema; diff --git a/packages/frontend/core/src/modules/docs-search/services/docs-search.ts b/packages/frontend/core/src/modules/docs-search/services/docs-search.ts index 0683074cfd..1cf6e6385d 100644 --- a/packages/frontend/core/src/modules/docs-search/services/docs-search.ts +++ b/packages/frontend/core/src/modules/docs-search/services/docs-search.ts @@ -1,31 +1,31 @@ import { toURLSearchParams } from '@affine/core/modules/navigation'; +import type { IndexerSyncState } from '@affine/nbstore'; import type { ReferenceParams } from '@blocksuite/affine/model'; -import { fromPromise, OnEvent, Service } from '@toeverything/infra'; +import { fromPromise, LiveData, Service } from '@toeverything/infra'; import { isEmpty, omit } from 'lodash-es'; import { map, type Observable, of, switchMap } from 'rxjs'; import { z } from 'zod'; +import type { DocsService } from '../../doc/services/docs'; import type { WorkspaceService } from '../../workspace'; -import { WorkspaceEngineBeforeStart } from '../../workspace'; -import { DocsIndexer } from '../entities/docs-indexer'; -@OnEvent(WorkspaceEngineBeforeStart, s => s.handleWorkspaceEngineBeforeStart) export class DocsSearchService extends Service { - readonly indexer = this.framework.createEntity(DocsIndexer); - - constructor(private readonly workspaceService: WorkspaceService) { + constructor( + private readonly workspaceService: WorkspaceService, + private readonly docsService: DocsService + ) { super(); } - handleWorkspaceEngineBeforeStart() { - // skip if in shared mode - if (this.workspaceService.workspace.openOptions.isSharedMode) { - return; - } - this.indexer.setupListener(); - this.indexer.startCrawling(); + get indexer() { + return this.workspaceService.workspace.engine.indexer; } + readonly indexerState$ = LiveData.from(this.indexer.state$, { + indexing: 0, + errorMessage: null, + } as IndexerSyncState); + search$(query: string): Observable< { docId: string; @@ -35,8 +35,9 @@ export class DocsSearchService extends Service { blockContent?: string; }[] > { - return this.indexer.blockIndex + return this.indexer .aggregate$( + 'block', { type: 'boolean', occur: 'must', @@ -89,47 +90,40 @@ export class DocsSearchService extends Service { } ) .pipe( - switchMap(({ buckets }) => { - return fromPromise(async () => { - const docData = await this.indexer.docIndex.getAll( - buckets.map(bucket => bucket.key) - ); + map(({ buckets }) => { + const result = []; - const result = []; - - for (const bucket of buckets) { - const firstMatchFlavour = bucket.hits.nodes[0]?.fields.flavour; - if (firstMatchFlavour === 'affine:page') { - // is title match - const blockContent = - bucket.hits.nodes[1]?.highlights.content[0]; // try to get block content - result.push({ - docId: bucket.key, - title: bucket.hits.nodes[0].highlights.content[0], - score: bucket.score, - blockContent, - }); - } else { - const title = - docData.find(doc => doc.id === bucket.key)?.get('title') ?? - ''; - const matchedBlockId = bucket.hits.nodes[0]?.fields.blockId; - // is block match - result.push({ - docId: bucket.key, - title: typeof title === 'string' ? title : title[0], - blockId: - typeof matchedBlockId === 'string' - ? matchedBlockId - : matchedBlockId[0], - score: bucket.score, - blockContent: bucket.hits.nodes[0]?.highlights.content[0], - }); - } + for (const bucket of buckets) { + const firstMatchFlavour = bucket.hits.nodes[0]?.fields.flavour; + if (firstMatchFlavour === 'affine:page') { + // is title match + const blockContent = bucket.hits.nodes[1]?.highlights.content[0]; // try to get block content + result.push({ + docId: bucket.key, + title: bucket.hits.nodes[0].highlights.content[0], + score: bucket.score, + blockContent, + }); + } else { + const title = + this.docsService.list.doc$(bucket.key).value?.title$.value ?? + ''; + const matchedBlockId = bucket.hits.nodes[0]?.fields.blockId; + // is block match + result.push({ + docId: bucket.key, + title: title, + blockId: + typeof matchedBlockId === 'string' + ? matchedBlockId + : matchedBlockId[0], + score: bucket.score, + blockContent: bucket.hits.nodes[0]?.highlights.content[0], + }); } + } - return result; - }); + return result; }) ); } @@ -140,8 +134,9 @@ export class DocsSearchService extends Service { return of([]); } - return this.indexer.blockIndex + return this.indexer .search$( + 'block', { type: 'boolean', occur: 'must', @@ -185,18 +180,12 @@ export class DocsSearchService extends Service { ).values() ); - const docData = await this.indexer.docIndex.getAll( - Array.from(new Set(refs.map(ref => ref.docId))) - ); - return refs .flatMap(ref => { - const doc = docData.find(doc => doc.id === ref.docId); + const doc = this.docsService.list.doc$(ref.docId).value; if (!doc) return null; - const titles = doc.get('title'); - const title = - (Array.isArray(titles) ? titles[0] : titles) ?? ''; + const title = doc.title$.value; const params = omit(ref, ['docId']); return { @@ -214,8 +203,9 @@ export class DocsSearchService extends Service { } watchRefsTo(docId: string) { - return this.indexer.blockIndex + return this.indexer .aggregate$( + 'block', { type: 'boolean', occur: 'must', @@ -262,13 +252,10 @@ export class DocsSearchService extends Service { .pipe( switchMap(({ buckets }) => { return fromPromise(async () => { - const docData = await this.indexer.docIndex.getAll( - buckets.map(bucket => bucket.key) - ); - return buckets.flatMap(bucket => { const title = - docData.find(doc => doc.id === bucket.key)?.get('title') ?? ''; + this.docsService.list.doc$(bucket.key).value?.title$.value ?? + ''; return bucket.hits.nodes.map(node => { const blockId = node.fields.blockId ?? ''; @@ -297,7 +284,7 @@ export class DocsSearchService extends Service { return { docId: bucket.key, blockId: typeof blockId === 'string' ? blockId : blockId[0], - title: typeof title === 'string' ? title : title[0], + title: title, markdownPreview: typeof markdownPreview === 'string' ? markdownPreview @@ -330,8 +317,9 @@ export class DocsSearchService extends Service { const DatabaseAdditionalSchema = z.object({ databaseName: z.string().optional(), }); - return this.indexer.blockIndex + return this.indexer .search$( + 'block', { type: 'boolean', occur: 'must', @@ -397,8 +385,9 @@ export class DocsSearchService extends Service { } watchDocSummary(docId: string) { - return this.indexer.docIndex + return this.indexer .search$( + 'doc', { type: 'match', field: 'docId', @@ -422,8 +411,4 @@ export class DocsSearchService extends Service { }) ); } - - override dispose(): void { - this.indexer.dispose(); - } } diff --git a/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts b/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts deleted file mode 100644 index b265ec9ede..0000000000 --- a/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts +++ /dev/null @@ -1,1037 +0,0 @@ -import { defaultBlockMarkdownAdapterMatchers } from '@blocksuite/affine/adapters'; -import { Container } from '@blocksuite/affine/global/di'; -import { - InlineDeltaToMarkdownAdapterExtensions, - MarkdownInlineToDeltaAdapterExtensions, -} from '@blocksuite/affine/inlines/preset'; -import { - type AttachmentBlockModel, - type BookmarkBlockModel, - type EmbedBlockModel, - type ImageBlockModel, - type TableBlockModel, - TableModelFlavour, -} from '@blocksuite/affine/model'; -import { MarkdownAdapter } from '@blocksuite/affine/shared/adapters'; -import type { AffineTextAttributes } from '@blocksuite/affine/shared/types'; -import { - createYProxy, - type DraftModel, - Transformer, - type TransformerMiddleware, - type YBlock, -} from '@blocksuite/affine/store'; -import type { DeltaInsert } from '@blocksuite/store'; -import { Document } from '@toeverything/infra'; -import { toHexString } from 'lib0/buffer.js'; -import { digest as lib0Digest } from 'lib0/hash/sha256'; -import { difference, uniq } from 'lodash-es'; -import { - applyUpdate, - Array as YArray, - Doc as YDoc, - Map as YMap, - Text as YText, -} from 'yjs'; - -import { getAFFiNEWorkspaceSchema } from '../../workspace/global-schema'; -import { WorkspaceImpl } from '../../workspace/impls/workspace'; -import type { BlockIndexSchema, DocIndexSchema } from '../schema'; -import type { - WorkerIngoingMessage, - WorkerInput, - WorkerOutgoingMessage, - WorkerOutput, -} from './types'; - -const blocksuiteSchema = getAFFiNEWorkspaceSchema(); - -const LRU_CACHE_SIZE = 5; - -// lru cache for ydoc instances, last used at the end of the array -const lruCache = [] as { doc: YDoc; hash: string }[]; - -async function digest(data: Uint8Array) { - if ( - globalThis.crypto && - globalThis.crypto.subtle && - typeof globalThis.crypto.subtle.digest === 'function' - ) { - return new Uint8Array( - await globalThis.crypto.subtle.digest('SHA-256', data) - ); - } - return lib0Digest(data); -} - -async function getOrCreateCachedYDoc(data: Uint8Array) { - try { - const hash = toHexString(await digest(data)); - const cachedIndex = lruCache.findIndex(item => item.hash === hash); - if (cachedIndex !== -1) { - const cached = lruCache.splice(cachedIndex, 1)[0]; - lruCache.push(cached); - return cached.doc; - } else { - const doc = new YDoc(); - if (!isEmptyUpdate(data)) { - applyUpdate(doc, data); - } - lruCache.push({ doc, hash }); - return doc; - } - } finally { - if (lruCache.length > LRU_CACHE_SIZE) { - lruCache.shift(); - } - } -} - -interface BlockDocumentInfo { - docId: string; - blockId: string; - content?: string | string[]; - flavour: string; - blob?: string[]; - refDocId?: string[]; - ref?: string[]; - parentFlavour?: string; - parentBlockId?: string; - additional?: { - databaseName?: string; - displayMode?: string; - noteBlockId?: string; - }; - yblock: YMap; - markdownPreview?: string; -} - -const bookmarkFlavours = new Set([ - 'affine:bookmark', - 'affine:embed-youtube', - 'affine:embed-figma', - 'affine:embed-github', - 'affine:embed-loom', -]); - -const markdownPreviewDocCollection = new WorkspaceImpl({ - id: 'indexer', -}); - -function generateMarkdownPreviewBuilder( - yRootDoc: YDoc, - workspaceId: string, - blocks: BlockDocumentInfo[] -) { - function yblockToDraftModal(yblock: YBlock): DraftModel | null { - const flavour = yblock.get('sys:flavour') as string; - const blockSchema = blocksuiteSchema.flavourSchemaMap.get(flavour); - if (!blockSchema) { - return null; - } - const keys = Array.from(yblock.keys()) - .filter(key => key.startsWith('prop:')) - .map(key => key.substring(5)); - - const props = Object.fromEntries( - keys.map(key => [key, createYProxy(yblock.get(`prop:${key}`))]) - ); - - return { - props, - id: yblock.get('sys:id') as string, - flavour, - children: [], - role: blockSchema.model.role, - version: (yblock.get('sys:version') as number) ?? blockSchema.version, - keys: Array.from(yblock.keys()) - .filter(key => key.startsWith('prop:')) - .map(key => key.substring(5)), - } as unknown as DraftModel; - } - - const titleMiddleware: TransformerMiddleware = ({ adapterConfigs }) => { - const pages = yRootDoc.getMap('meta').get('pages'); - if (!(pages instanceof YArray)) { - return; - } - for (const meta of pages.toArray()) { - adapterConfigs.set( - 'title:' + meta.get('id'), - meta.get('title')?.toString() ?? 'Untitled' - ); - } - }; - - const baseUrl = `/workspace/${workspaceId}`; - - function getDocLink(docId: string, blockId: string) { - const searchParams = new URLSearchParams(); - searchParams.set('blockIds', blockId); - return `${baseUrl}/${docId}?${searchParams.toString()}`; - } - - const docLinkBaseURLMiddleware: TransformerMiddleware = ({ - adapterConfigs, - }) => { - adapterConfigs.set('docLinkBaseUrl', baseUrl); - }; - - const container = new Container(); - [ - ...MarkdownInlineToDeltaAdapterExtensions, - ...defaultBlockMarkdownAdapterMatchers, - ...InlineDeltaToMarkdownAdapterExtensions, - ].forEach(ext => { - ext.setup(container); - }); - - const provider = container.provider(); - const markdownAdapter = new MarkdownAdapter( - new Transformer({ - schema: getAFFiNEWorkspaceSchema(), - blobCRUD: markdownPreviewDocCollection.blobSync, - docCRUD: { - create: (id: string) => - markdownPreviewDocCollection.createDoc(id).getStore({ id }), - get: (id: string) => - markdownPreviewDocCollection.getDoc(id)?.getStore({ id }) ?? null, - delete: (id: string) => markdownPreviewDocCollection.removeDoc(id), - }, - middlewares: [docLinkBaseURLMiddleware, titleMiddleware], - }), - provider - ); - - const markdownPreviewCache = new WeakMap(); - - function trimCodeBlock(markdown: string) { - const lines = markdown.split('\n').filter(line => line.trim() !== ''); - if (lines.length > 5) { - return [...lines.slice(0, 4), '...', lines.at(-1), ''].join('\n'); - } - return [...lines, ''].join('\n'); - } - - function trimParagraph(markdown: string) { - const lines = markdown.split('\n').filter(line => line.trim() !== ''); - - if (lines.length > 3) { - return [...lines.slice(0, 3), '...', lines.at(-1), ''].join('\n'); - } - - return [...lines, ''].join('\n'); - } - - function getListDepth(block: BlockDocumentInfo) { - let parentBlockCount = 0; - let currentBlock: BlockDocumentInfo | undefined = block; - do { - currentBlock = blocks.find( - b => b.blockId === currentBlock?.parentBlockId - ); - - // reach the root block. do not count it. - if (!currentBlock || currentBlock.flavour !== 'affine:list') { - break; - } - parentBlockCount++; - } while (currentBlock); - return parentBlockCount; - } - - // only works for list block - function indentMarkdown(markdown: string, depth: number) { - if (depth <= 0) { - return markdown; - } - - return ( - markdown - .split('\n') - .map(line => ' '.repeat(depth) + line) - .join('\n') + '\n' - ); - } - - const generateDatabaseMarkdownPreview = (block: BlockDocumentInfo) => { - const isDatabaseBlock = (block: BlockDocumentInfo) => { - return block.flavour === 'affine:database'; - }; - - const model = yblockToDraftModal(block.yblock); - - if (!model) { - return null; - } - - let dbBlock: BlockDocumentInfo | null = null; - - if (isDatabaseBlock(block)) { - dbBlock = block; - } else { - const parentBlock = blocks.find(b => b.blockId === block.parentBlockId); - - if (parentBlock && isDatabaseBlock(parentBlock)) { - dbBlock = parentBlock; - } - } - - if (!dbBlock) { - return null; - } - - const url = getDocLink(block.docId, dbBlock.blockId); - const title = dbBlock.additional?.databaseName; - - return `[database ยท ${title || 'Untitled'}][](${url})\n`; - }; - - const generateImageMarkdownPreview = (block: BlockDocumentInfo) => { - const isImageModel = ( - model: DraftModel | null - ): model is DraftModel => { - return model?.flavour === 'affine:image'; - }; - - const model = yblockToDraftModal(block.yblock); - - if (!isImageModel(model)) { - return null; - } - - const info = ['an image block']; - - if (model.props.sourceId) { - info.push(`file id ${model.props.sourceId}`); - } - - if (model.props.caption) { - info.push(`with caption ${model.props.caption}`); - } - - return info.join(', ') + '\n'; - }; - - const generateEmbedMarkdownPreview = (block: BlockDocumentInfo) => { - const isEmbedModel = ( - model: DraftModel | null - ): model is DraftModel => { - return ( - model?.flavour === 'affine:embed-linked-doc' || - model?.flavour === 'affine:embed-synced-doc' - ); - }; - - const draftModel = yblockToDraftModal(block.yblock); - if (!isEmbedModel(draftModel)) { - return null; - } - - const url = getDocLink(block.docId, draftModel.id); - - return `[](${url})\n`; - }; - - const generateLatexMarkdownPreview = (block: BlockDocumentInfo) => { - let content = - typeof block.content === 'string' - ? block.content.trim() - : block.content?.join('').trim(); - - content = content?.split('\n').join(' ') ?? ''; - - return `LaTeX, with value ${content}\n`; - }; - - const generateBookmarkMarkdownPreview = (block: BlockDocumentInfo) => { - const isBookmarkModel = ( - model: DraftModel | null - ): model is DraftModel => { - return bookmarkFlavours.has(model?.flavour ?? ''); - }; - - const draftModel = yblockToDraftModal(block.yblock); - if (!isBookmarkModel(draftModel)) { - return null; - } - const title = draftModel.props.title; - const url = draftModel.props.url; - return `[${title}](${url})\n`; - }; - - const generateAttachmentMarkdownPreview = (block: BlockDocumentInfo) => { - const isAttachmentModel = ( - model: DraftModel | null - ): model is DraftModel => { - return model?.flavour === 'affine:attachment'; - }; - - const draftModel = yblockToDraftModal(block.yblock); - if (!isAttachmentModel(draftModel)) { - return null; - } - - return `[${draftModel.props.name}](${draftModel.props.sourceId})\n`; - }; - - const generateTableMarkdownPreview = (block: BlockDocumentInfo) => { - const isTableModel = ( - model: DraftModel | null - ): model is DraftModel => { - return model?.flavour === TableModelFlavour; - }; - - const draftModel = yblockToDraftModal(block.yblock); - if (!isTableModel(draftModel)) { - return null; - } - - const url = getDocLink(block.docId, draftModel.id); - - return `[table][](${url})\n`; - }; - - const generateMarkdownPreview = async (block: BlockDocumentInfo) => { - if (markdownPreviewCache.has(block)) { - return markdownPreviewCache.get(block); - } - const flavour = block.flavour; - let markdown: string | null = null; - - if ( - flavour === 'affine:paragraph' || - flavour === 'affine:list' || - flavour === 'affine:code' - ) { - const draftModel = yblockToDraftModal(block.yblock); - markdown = - block.parentFlavour === 'affine:database' - ? generateDatabaseMarkdownPreview(block) - : ((draftModel ? await markdownAdapter.fromBlock(draftModel) : null) - ?.file ?? null); - - if (markdown) { - if (flavour === 'affine:code') { - markdown = trimCodeBlock(markdown); - } else if (flavour === 'affine:paragraph') { - markdown = trimParagraph(markdown); - } - } - } else if (flavour === 'affine:database') { - markdown = generateDatabaseMarkdownPreview(block); - } else if ( - flavour === 'affine:embed-linked-doc' || - flavour === 'affine:embed-synced-doc' - ) { - markdown = generateEmbedMarkdownPreview(block); - } else if (flavour === 'affine:attachment') { - markdown = generateAttachmentMarkdownPreview(block); - } else if (flavour === 'affine:image') { - markdown = generateImageMarkdownPreview(block); - } else if (flavour === 'affine:surface' || flavour === 'affine:page') { - // skip - } else if (flavour === 'affine:latex') { - markdown = generateLatexMarkdownPreview(block); - } else if (bookmarkFlavours.has(flavour)) { - markdown = generateBookmarkMarkdownPreview(block); - } else if (flavour === TableModelFlavour) { - markdown = generateTableMarkdownPreview(block); - } else { - console.warn(`unknown flavour: ${flavour}`); - } - - if (markdown && flavour === 'affine:list') { - const blockDepth = getListDepth(block); - markdown = indentMarkdown(markdown, Math.max(0, blockDepth)); - } - - markdownPreviewCache.set(block, markdown); - return markdown; - }; - - return generateMarkdownPreview; -} - -// remove the indent of the first line of list -// e.g., -// ``` -// - list item 1 -// - list item 2 -// ``` -// becomes -// ``` -// - list item 1 -// - list item 2 -// ``` -function unindentMarkdown(markdown: string) { - const lines = markdown.split('\n'); - const res: string[] = []; - let firstListFound = false; - let baseIndent = 0; - - for (let current of lines) { - const indent = current.match(/^\s*/)?.[0]?.length ?? 0; - - if (indent > 0) { - if (!firstListFound) { - // For the first list item, remove all indentation - firstListFound = true; - baseIndent = indent; - current = current.trimStart(); - } else { - // For subsequent list items, maintain relative indentation - current = - ' '.repeat(Math.max(0, indent - baseIndent)) + current.trimStart(); - } - } - - res.push(current); - } - - return res.join('\n'); -} - -async function crawlingDocData({ - docBuffer, - docId, - rootDocBuffer, - rootDocId, -}: WorkerInput & { type: 'doc' }): Promise { - if (isEmptyUpdate(rootDocBuffer)) { - console.warn('[worker]: Empty root doc buffer'); - return {}; - } - - const yRootDoc = await getOrCreateCachedYDoc(rootDocBuffer); - - let docExists: boolean | null = null; - - ( - yRootDoc.getMap('meta').get('pages') as YArray> | undefined - )?.forEach(page => { - if (page.get('id') === docId) { - docExists = !(page.get('trash') ?? false); - } - }); - - if (!docExists) { - return { - deletedDoc: [docId], - }; - } else { - if (isEmptyUpdate(docBuffer)) { - return { - deletedDoc: [docId], - }; - } - - const ydoc = await getOrCreateCachedYDoc(docBuffer); - let docTitle = ''; - let summaryLenNeeded = 1000; - let summary = ''; - const blockDocuments: BlockDocumentInfo[] = []; - - const generateMarkdownPreview = generateMarkdownPreviewBuilder( - yRootDoc, - rootDocId, - blockDocuments - ); - - const blocks = ydoc.getMap('blocks'); - - // build a parent map for quick lookup - // for each block, record its parent id - const parentMap: Record = {}; - for (const [id, block] of blocks.entries()) { - const children = block.get('sys:children') as YArray | undefined; - if (children instanceof YArray && children.length) { - for (const child of children) { - parentMap[child] = id; - } - } - } - - if (blocks.size === 0) { - return { deletedDoc: [docId] }; - } - - // find the nearest block that satisfies the predicate - const nearest = ( - blockId: string, - predicate: (block: YMap) => boolean - ) => { - let current: string | null = blockId; - while (current) { - const block = blocks.get(current); - if (block && predicate(block)) { - return block; - } - current = parentMap[current] ?? null; - } - return null; - }; - - const nearestByFlavour = (blockId: string, flavour: string) => - nearest(blockId, block => block.get('sys:flavour') === flavour); - - let rootBlockId: string | null = null; - for (const block of blocks.values()) { - const flavour = block.get('sys:flavour')?.toString(); - const blockId = block.get('sys:id')?.toString(); - if (flavour === 'affine:page' && blockId) { - rootBlockId = blockId; - } - } - - if (!rootBlockId) { - return { deletedDoc: [docId] }; - } - - const queue: { parent?: string; id: string }[] = [{ id: rootBlockId }]; - const visited = new Set(); // avoid loop - - const pushChildren = (id: string, block: YMap) => { - const children = block.get('sys:children'); - if (children instanceof YArray && children.length) { - for (let i = children.length - 1; i >= 0; i--) { - const childId = children.get(i); - if (childId && !visited.has(childId)) { - queue.push({ parent: id, id: childId }); - visited.add(childId); - } - } - } - }; - - // #region first loop - generate block base info - while (queue.length) { - const next = queue.pop(); - if (!next) { - break; - } - - const { parent: parentBlockId, id: blockId } = next; - const block = blockId ? blocks.get(blockId) : null; - const parentBlock = parentBlockId ? blocks.get(parentBlockId) : null; - if (!block) { - break; - } - - const flavour = block.get('sys:flavour')?.toString(); - const parentFlavour = parentBlock?.get('sys:flavour')?.toString(); - const noteBlock = nearestByFlavour(blockId, 'affine:note'); - - // display mode: - // - both: page and edgeless -> fallback to page - // - page: only page -> page - // - edgeless: only edgeless -> edgeless - // - undefined: edgeless (assuming it is a normal element on the edgeless) - let displayMode = noteBlock?.get('prop:displayMode') ?? 'edgeless'; - - if (displayMode === 'both') { - displayMode = 'page'; - } - - const noteBlockId: string | undefined = noteBlock - ?.get('sys:id') - ?.toString(); - - pushChildren(blockId, block); - - const commonBlockProps = { - docId, - flavour, - blockId, - yblock: block, - additional: { displayMode, noteBlockId }, - }; - - if (flavour === 'affine:page') { - docTitle = block.get('prop:title').toString(); - blockDocuments.push({ - ...commonBlockProps, - content: docTitle, - }); - } else if ( - flavour === 'affine:paragraph' || - flavour === 'affine:list' || - flavour === 'affine:code' - ) { - const text = block.get('prop:text') as YText; - - if (!text) { - continue; - } - - const deltas: DeltaInsert[] = text.toDelta(); - const refs = uniq( - deltas - .flatMap(delta => { - if ( - delta.attributes && - delta.attributes.reference && - delta.attributes.reference.pageId - ) { - const { pageId: refDocId, params = {} } = - delta.attributes.reference; - return { - refDocId, - ref: JSON.stringify({ docId: refDocId, ...params }), - }; - } - return null; - }) - .filter(ref => !!ref) - ); - - const databaseName = - flavour === 'affine:paragraph' && parentFlavour === 'affine:database' // if block is a database row - ? parentBlock?.get('prop:title')?.toString() - : undefined; - - blockDocuments.push({ - ...commonBlockProps, - content: text.toString(), - ...refs.reduce<{ refDocId: string[]; ref: string[] }>( - (prev, curr) => { - prev.refDocId.push(curr.refDocId); - prev.ref.push(curr.ref); - return prev; - }, - { refDocId: [], ref: [] } - ), - parentFlavour, - parentBlockId, - additional: { ...commonBlockProps.additional, databaseName }, - }); - - if (summaryLenNeeded > 0) { - summary += text.toString(); - summaryLenNeeded -= text.length; - } - } else if ( - flavour === 'affine:embed-linked-doc' || - flavour === 'affine:embed-synced-doc' - ) { - const pageId = block.get('prop:pageId'); - if (typeof pageId === 'string') { - // reference info - const params = block.get('prop:params') ?? {}; - blockDocuments.push({ - ...commonBlockProps, - refDocId: [pageId], - ref: [JSON.stringify({ docId: pageId, ...params })], - parentFlavour, - parentBlockId, - }); - } - } else if ( - flavour === 'affine:attachment' || - flavour === 'affine:image' - ) { - const blobId = block.get('prop:sourceId'); - if (typeof blobId === 'string') { - blockDocuments.push({ - ...commonBlockProps, - blob: [blobId], - parentFlavour, - parentBlockId, - }); - } - } else if (flavour === 'affine:surface') { - const texts = []; - - const elementsObj = block.get('prop:elements'); - if ( - !( - elementsObj instanceof YMap && - elementsObj.get('type') === '$blocksuite:internal:native$' - ) - ) { - continue; - } - const elements = elementsObj.get('value') as YMap; - if (!(elements instanceof YMap)) { - continue; - } - - for (const element of elements.values()) { - if (!(element instanceof YMap)) { - continue; - } - const text = element.get('text') as YText; - if (!text) { - continue; - } - - texts.push(text.toString()); - } - - blockDocuments.push({ - ...commonBlockProps, - content: texts, - parentFlavour, - parentBlockId, - }); - } else if (flavour === 'affine:database') { - const texts = []; - const columnsObj = block.get('prop:columns'); - const databaseTitle = block.get('prop:title'); - if (databaseTitle instanceof YText) { - texts.push(databaseTitle.toString()); - } - if (columnsObj instanceof YArray) { - for (const column of columnsObj) { - if (!(column instanceof YMap)) { - continue; - } - if (typeof column.get('name') === 'string') { - texts.push(column.get('name')); - } - - const data = column.get('data'); - if (!(data instanceof YMap)) { - continue; - } - const options = data.get('options'); - if (!(options instanceof YArray)) { - continue; - } - for (const option of options) { - if (!(option instanceof YMap)) { - continue; - } - const value = option.get('value'); - if (typeof value === 'string') { - texts.push(value); - } - } - } - } - - blockDocuments.push({ - ...commonBlockProps, - content: texts, - additional: { - ...commonBlockProps.additional, - databaseName: databaseTitle?.toString(), - }, - }); - } else if (flavour === 'affine:latex') { - blockDocuments.push({ - ...commonBlockProps, - content: block.get('prop:latex')?.toString() ?? '', - }); - } else if (flavour === TableModelFlavour) { - const contents = Array.from(block.keys()) - .map(key => { - if (key.startsWith('prop:cells.') && key.endsWith('.text')) { - return block.get(key)?.toString() ?? ''; - } - return ''; - }) - .filter(Boolean); - blockDocuments.push({ - ...commonBlockProps, - content: contents, - }); - } else if (bookmarkFlavours.has(flavour)) { - blockDocuments.push({ - ...commonBlockProps, - }); - } - } - // #endregion - - // #region second loop - generate markdown preview - const TARGET_PREVIEW_CHARACTER = 500; - const TARGET_PREVIOUS_BLOCK = 1; - const TARGET_FOLLOW_BLOCK = 4; - for (const block of blockDocuments) { - if (block.ref?.length) { - const target = block; - - // should only generate the markdown preview belong to the same affine:note - const noteBlock = nearestByFlavour(block.blockId, 'affine:note'); - - const sameNoteBlocks = noteBlock - ? blockDocuments.filter( - candidate => - nearestByFlavour(candidate.blockId, 'affine:note') === noteBlock - ) - : []; - - // only generate markdown preview for reference blocks - let previewText = (await generateMarkdownPreview(target)) ?? ''; - let previousBlock = 0; - let followBlock = 0; - let previousIndex = sameNoteBlocks.findIndex( - block => block.blockId === target.blockId - ); - let followIndex = previousIndex; - - while ( - !( - ( - previewText.length > TARGET_PREVIEW_CHARACTER || // stop if preview text reaches the limit - ((previousBlock >= TARGET_PREVIOUS_BLOCK || previousIndex < 0) && - (followBlock >= TARGET_FOLLOW_BLOCK || - followIndex >= sameNoteBlocks.length)) - ) // stop if no more blocks, or preview block reaches the limit - ) - ) { - if (previousBlock < TARGET_PREVIOUS_BLOCK) { - previousIndex--; - const block = - previousIndex >= 0 ? sameNoteBlocks.at(previousIndex) : null; - const markdown = block - ? await generateMarkdownPreview(block) - : null; - if ( - markdown && - !previewText.startsWith( - markdown - ) /* A small hack to skip blocks with the same content */ - ) { - previewText = markdown + '\n' + previewText; - previousBlock++; - } - } - - if (followBlock < TARGET_FOLLOW_BLOCK) { - followIndex++; - const block = sameNoteBlocks.at(followIndex); - const markdown = block - ? await generateMarkdownPreview(block) - : null; - if ( - markdown && - !previewText.endsWith( - markdown - ) /* A small hack to skip blocks with the same content */ - ) { - previewText = previewText + '\n' + markdown; - followBlock++; - } - } - } - - block.markdownPreview = unindentMarkdown(previewText); - } - } - // #endregion - - return { - addedDoc: [ - { - id: docId, - doc: Document.from(docId, { - docId, - title: docTitle, - summary, - }), - blocks: blockDocuments.map(block => - Document.from(`${docId}:${block.blockId}`, { - docId: block.docId, - blockId: block.blockId, - content: block.content, - flavour: block.flavour, - blob: block.blob, - refDocId: block.refDocId, - ref: block.ref, - parentFlavour: block.parentFlavour, - parentBlockId: block.parentBlockId, - additional: block.additional - ? JSON.stringify(block.additional) - : undefined, - markdownPreview: block.markdownPreview, - }) - ), - }, - ], - }; - } -} - -async function crawlingRootDocData({ - allIndexedDocs, - rootDocBuffer, - reindexAll, -}: WorkerInput & { - type: 'rootDoc'; -}): Promise { - const ydoc = await getOrCreateCachedYDoc(rootDocBuffer); - - const docs = ydoc.getMap('meta').get('pages') as - | YArray> - | undefined; - - if (!docs) { - return {}; - } - - const availableDocs = []; - - for (const page of docs) { - const docId = page.get('id'); - - if (typeof docId !== 'string') { - continue; - } - - const inTrash = page.get('trash') ?? false; - - if (!inTrash) { - availableDocs.push(docId); - } - } - - const needDelete = difference(allIndexedDocs, availableDocs); - const needAdd = reindexAll - ? availableDocs - : difference(availableDocs, allIndexedDocs); - - return { - reindexDoc: [...needAdd, ...needDelete].map(docId => ({ - docId, - storageDocId: ydoc.getMap('spaces').get(docId)?.guid ?? docId, - })), - }; -} - -globalThis.onmessage = async (event: MessageEvent) => { - const message = event.data; - if (message.type === 'init') { - postMessage({ type: 'init', msgId: message.msgId }); - return; - } - if (message.type === 'run') { - const { input } = message; - try { - let data; - if (input.type === 'rootDoc') { - data = await crawlingRootDocData(input); - } else { - data = await crawlingDocData(input); - } - - postMessage({ type: 'done', msgId: message.msgId, output: data }); - } catch (error) { - postMessage({ - type: 'failed', - msgId: message.msgId, - error: error instanceof Error ? error.message : error + '', - }); - } - } -}; - -declare function postMessage(message: WorkerOutgoingMessage): void; - -function isEmptyUpdate(binary: Uint8Array) { - return ( - binary.byteLength === 0 || - (binary.byteLength === 2 && binary[0] === 0 && binary[1] === 0) - ); -} diff --git a/packages/frontend/core/src/modules/docs-search/worker/out-worker.ts b/packages/frontend/core/src/modules/docs-search/worker/out-worker.ts deleted file mode 100644 index 8d1acee8a8..0000000000 --- a/packages/frontend/core/src/modules/docs-search/worker/out-worker.ts +++ /dev/null @@ -1,105 +0,0 @@ -import { DebugLogger } from '@affine/debug'; -import { MANUALLY_STOP, throwIfAborted } from '@toeverything/infra'; - -import type { - WorkerIngoingMessage, - WorkerInput, - WorkerOutgoingMessage, - WorkerOutput, -} from './types'; - -const logger = new DebugLogger('affine:indexer-worker'); - -export async function createWorker(abort: AbortSignal) { - let worker: Worker | null = null; - while (throwIfAborted(abort)) { - try { - worker = await new Promise((resolve, reject) => { - // @TODO(@forehalo): need to make a general worker - const worker = new Worker( - /* webpackChunkName: "worker" */ new URL( - './in-worker.ts', - import.meta.url - ) - ); - worker.addEventListener('error', reject); - worker.addEventListener('message', event => { - if (event.data.type === 'init') { - resolve(worker); - } - }); - worker.postMessage({ type: 'init', msgId: 0 } as WorkerIngoingMessage); - - setTimeout(() => { - reject('timeout'); - }, 1000 * 30 /* 30 sec */); - }); - } catch (err) { - logger.debug( - `Indexer worker init failed, ${err}, will retry in 5 seconds.` - ); - await new Promise(resolve => setTimeout(resolve, 5000)); - } - if (worker) { - break; - } - } - - if (!worker) { - // never reach here - throw new Error('Worker is not created'); - } - - const terminateAbort = new AbortController(); - - let msgId = 1; - - return { - run: async (input: WorkerInput) => { - const dispose: (() => void)[] = []; - return new Promise((resolve, reject) => { - const currentMsgId = msgId++; - const msgHandler = (event: MessageEvent) => { - if (event.data.msgId === currentMsgId) { - if (event.data.type === 'done') { - resolve(event.data.output); - } else if (event.data.type === 'failed') { - reject(new Error(event.data.error)); - } else { - reject(new Error('Unknown message type')); - } - } - }; - const abortHandler = (reason: any) => { - reject(reason); - }; - - worker.addEventListener('message', msgHandler); - dispose.push(() => { - worker?.removeEventListener('message', msgHandler); - }); - - terminateAbort.signal.addEventListener('abort', abortHandler); - dispose.push(() => { - terminateAbort.signal.removeEventListener('abort', abortHandler); - }); - - worker.postMessage({ - type: 'run', - msgId: currentMsgId, - input, - } as WorkerIngoingMessage); - }).finally(() => { - for (const d of dispose) { - d(); - } - }); - }, - dispose: () => { - terminateAbort.abort(MANUALLY_STOP); - worker.terminate(); - }, - }; -} - -export type IndexerWorker = Awaited>; diff --git a/packages/frontend/core/src/modules/docs-search/worker/types.ts b/packages/frontend/core/src/modules/docs-search/worker/types.ts deleted file mode 100644 index 566b252c37..0000000000 --- a/packages/frontend/core/src/modules/docs-search/worker/types.ts +++ /dev/null @@ -1,53 +0,0 @@ -import type { Document } from '@toeverything/infra'; - -import type { BlockIndexSchema, DocIndexSchema } from '../schema'; - -export type WorkerIngoingMessage = ( - | { - type: 'init'; - } - | { - type: 'run'; - input: WorkerInput; - } -) & { msgId: number }; - -export type WorkerOutgoingMessage = ( - | { - type: 'init'; - } - | { - type: 'done'; - output: WorkerOutput; - } - | { - type: 'failed'; - error: string; - } -) & { msgId: number }; - -export type WorkerInput = - | { - type: 'rootDoc'; - rootDocBuffer: Uint8Array; - rootDocId: string; - allIndexedDocs: string[]; - reindexAll?: boolean; - } - | { - type: 'doc'; - docId: string; - rootDocId: string; - rootDocBuffer: Uint8Array; - docBuffer: Uint8Array; - }; - -export interface WorkerOutput { - reindexDoc?: { docId: string }[]; - addedDoc?: { - id: string; - blocks: Document[]; - doc: Document; - }[]; - deletedDoc?: string[]; -} diff --git a/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx b/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx index 3dad699c5b..136002d98e 100644 --- a/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx +++ b/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx @@ -19,6 +19,7 @@ import { useI18n } from '@affine/i18n'; import { track } from '@affine/track'; import { LiveData, + MANUALLY_STOP, useLiveData, useService, useServices, @@ -98,19 +99,23 @@ export const ExplorerDocNode = ({ ); const searching = children === null; - const indexerLoading = useLiveData( - docsSearchService.indexer.status$.map( - v => v.remaining === undefined || v.remaining > 0 - ) - ); const [referencesLoading, setReferencesLoading] = useState(true); useLayoutEffect(() => { - setReferencesLoading( - prev => - prev && - indexerLoading /* after loading becomes false, it never becomes true */ - ); - }, [indexerLoading]); + const abortController = new AbortController(); + docsSearchService.indexer + .waitForDocCompletedWithPriority(docId, 100, abortController.signal) + .then(() => { + setReferencesLoading(false); + }) + .catch(err => { + if (err !== MANUALLY_STOP) { + console.error(err); + } + }); + return () => { + abortController.abort(MANUALLY_STOP); + }; + }, [docId, docsSearchService]); const dndData = useMemo(() => { return { diff --git a/packages/frontend/core/src/modules/quicksearch/impls/docs.ts b/packages/frontend/core/src/modules/quicksearch/impls/docs.ts index d2ebacdb03..c162913d69 100644 --- a/packages/frontend/core/src/modules/quicksearch/impls/docs.ts +++ b/packages/frontend/core/src/modules/quicksearch/impls/docs.ts @@ -33,10 +33,11 @@ export class DocsQuickSearchSession super(); } - private readonly isIndexerLoading$ = - this.docsSearchService.indexer.status$.map(({ remaining }) => { - return remaining === undefined || remaining > 0; - }); + private readonly isIndexerLoading$ = this.docsSearchService.indexerState$.map( + ({ completed }) => { + return !completed; + } + ); private readonly isQueryLoading$ = new LiveData(false); diff --git a/packages/frontend/core/src/modules/search-menu/services/index.ts b/packages/frontend/core/src/modules/search-menu/services/index.ts index 76d104d7cc..03e8c63979 100644 --- a/packages/frontend/core/src/modules/search-menu/services/index.ts +++ b/packages/frontend/core/src/modules/search-menu/services/index.ts @@ -17,7 +17,7 @@ import { cssVarV2 } from '@toeverything/theme/v2'; import Fuse, { type FuseResultMatch } from 'fuse.js'; import { html } from 'lit'; import { unsafeHTML } from 'lit/directives/unsafe-html.js'; -import { map, takeWhile } from 'rxjs'; +import { map } from 'rxjs'; import type { CollectionService } from '../../collection'; import type { DocDisplayMetaService } from '../../doc-display-meta'; @@ -119,15 +119,6 @@ export class SearchMenuService extends Service { [] ); - const { signal: isIndexerLoading, cleanup: cleanupIndexerLoading } = - createSignalFromObservable( - this.docsSearch.indexer.status$.pipe( - map(status => status.remaining !== undefined && status.remaining > 0), - takeWhile(isLoading => isLoading, true) - ), - false - ); - const overflowText = computed(() => { const overflowCount = docsSignal.value.length - MAX_DOCS; return I18n.t('com.affine.editor.at-menu.more-docs-hint', { @@ -137,14 +128,12 @@ export class SearchMenuService extends Service { abortSignal.addEventListener('abort', () => { cleanupDocs(); - cleanupIndexerLoading(); }); return { name: I18n.t('com.affine.editor.at-menu.link-to-doc', { query, }), - loading: isIndexerLoading, items: docsSignal, maxDisplay: MAX_DOCS, overflowText, @@ -153,8 +142,9 @@ export class SearchMenuService extends Service { // only search docs by title, excluding blocks private searchDocs$(query: string) { - return this.docsSearch.indexer.docIndex + return this.docsSearch.indexer .aggregate$( + 'doc', { type: 'boolean', occur: 'must', diff --git a/packages/frontend/core/src/modules/workspace-engine/impls/cloud.ts b/packages/frontend/core/src/modules/workspace-engine/impls/cloud.ts index 8e58005426..586777f2a3 100644 --- a/packages/frontend/core/src/modules/workspace-engine/impls/cloud.ts +++ b/packages/frontend/core/src/modules/workspace-engine/impls/cloud.ts @@ -460,6 +460,22 @@ class CloudWorkspaceFlavourProvider implements WorkspaceFlavourProvider { id: `${this.flavour}:${workspaceId}`, }, }, + indexer: { + name: 'IndexedDBIndexerStorage', + opts: { + flavour: this.flavour, + type: 'workspace', + id: workspaceId, + }, + }, + indexerSync: { + name: 'IndexedDBIndexerSyncStorage', + opts: { + flavour: this.flavour, + type: 'workspace', + id: workspaceId, + }, + }, }, remotes: { [`cloud:${this.flavour}`]: { diff --git a/packages/frontend/core/src/modules/workspace-engine/impls/local.ts b/packages/frontend/core/src/modules/workspace-engine/impls/local.ts index ce1266c6be..cebdbb352d 100644 --- a/packages/frontend/core/src/modules/workspace-engine/impls/local.ts +++ b/packages/frontend/core/src/modules/workspace-engine/impls/local.ts @@ -349,6 +349,22 @@ class LocalWorkspaceFlavourProvider implements WorkspaceFlavourProvider { id: workspaceId, }, }, + indexer: { + name: 'IndexedDBIndexerStorage', + opts: { + flavour: this.flavour, + type: 'workspace', + id: workspaceId, + }, + }, + indexerSync: { + name: 'IndexedDBIndexerSyncStorage', + opts: { + flavour: this.flavour, + type: 'workspace', + id: workspaceId, + }, + }, }, remotes: { v1: { diff --git a/packages/frontend/core/src/modules/workspace/entities/engine.ts b/packages/frontend/core/src/modules/workspace/entities/engine.ts index 9a423ee89a..575ccdbc4b 100644 --- a/packages/frontend/core/src/modules/workspace/entities/engine.ts +++ b/packages/frontend/core/src/modules/workspace/entities/engine.ts @@ -36,6 +36,13 @@ export class WorkspaceEngine extends Entity<{ return this.client.blobFrontend; } + get indexer() { + if (!this.client) { + throw new Error('Engine is not initialized'); + } + return this.client.indexerFrontend; + } + get awareness() { if (!this.client) { throw new Error('Engine is not initialized'); diff --git a/tools/utils/src/workspace.gen.ts b/tools/utils/src/workspace.gen.ts index af97671c99..443585cd1a 100644 --- a/tools/utils/src/workspace.gen.ts +++ b/tools/utils/src/workspace.gen.ts @@ -903,6 +903,7 @@ export const PackageList = [ 'packages/common/infra', 'packages/common/error', 'packages/common/graphql', + 'blocksuite/affine/all', ], }, { diff --git a/yarn.lock b/yarn.lock index e1e8bad4f5..8f95beb159 100644 --- a/yarn.lock +++ b/yarn.lock @@ -808,11 +808,14 @@ __metadata: dependencies: "@affine/error": "workspace:*" "@affine/graphql": "workspace:*" + "@blocksuite/affine": "workspace:*" "@datastructures-js/binary-search-tree": "npm:^5.3.2" "@toeverything/infra": "workspace:*" eventemitter2: "npm:^6.4.9" fake-indexeddb: "npm:^6.0.0" + graphemer: "npm:^1.4.0" idb: "npm:^8.0.0" + lib0: "npm:^0.2.99" lodash-es: "npm:^4.17.21" nanoid: "npm:^5.0.9" rxjs: "npm:^7.8.1" @@ -824,6 +827,7 @@ __metadata: peerDependencies: "@affine/error": "workspace:*" "@affine/graphql": "workspace:*" + "@blocksuite/affine": "workspace:*" idb: ^8.0.0 socket.io-client: ^4.7.5 languageName: unknown