diff --git a/packages/common/infra/src/sync/indexer/document.ts b/packages/common/infra/src/sync/indexer/document.ts index 5983800b33..d21397e8d3 100644 --- a/packages/common/infra/src/sync/indexer/document.ts +++ b/packages/common/infra/src/sync/indexer/document.ts @@ -40,7 +40,10 @@ export class Document { } } else { for (const key in map) { - doc.insert(key, map[key] as string | string[]); + if (map[key] === undefined) { + continue; + } + doc.insert(key, map[key]); } } return doc; diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts b/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts index f418bd0674..2fd5c031a3 100644 --- a/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts +++ b/packages/common/infra/src/sync/indexer/impl/indexeddb/data-struct.ts @@ -1,3 +1,4 @@ +import { DebugLogger } from '@affine/debug'; import { type DBSchema, type IDBPDatabase, @@ -25,6 +26,8 @@ import { } from './inverted-index'; import { Match } from './match'; +const logger = new DebugLogger('indexeddb'); + export interface IndexDB extends DBSchema { kvMetadata: { key: string; @@ -75,14 +78,19 @@ export class DataStruct { constructor( readonly databaseName: string, - schema: Schema + readonly schema: Schema ) { for (const [key, type] of Object.entries(schema)) { - if (type === 'String') { + const typeInfo = typeof type === 'string' ? { type } : type; + if (typeInfo.index === false) { + // If index is false, we don't need to create an inverted index for this field. + continue; + } + if (typeInfo.type === 'String') { this.invertedIndex.set(key, new StringInvertedIndex(key)); - } else if (type === 'Integer') { + } else if (typeInfo.type === 'Integer') { this.invertedIndex.set(key, new IntegerInvertedIndex(key)); - } else if (type === 'FullText') { + } else if (typeInfo.type === 'FullText') { this.invertedIndex.set(key, new FullTextInvertedIndex(key)); } else if (type === 'Boolean') { this.invertedIndex.set(key, new BooleanInvertedIndex(key)); @@ -102,17 +110,29 @@ export class DataStruct { throw new Error('Document already exists'); } + const dataMap = new Map(); + + for (const [key, values] of document.fields) { + const type = this.schema[key as string]; + if (!type) { + return; + } + const typeInfo = typeof type === 'string' ? { type } : type; + if (typeInfo.store !== false) { + // If store is false, the field will not be stored + dataMap.set(key, values); + } + } + const nid = await trx.objectStore('records').add({ id: document.id, - data: new Map(document.fields as Map), + data: dataMap, }); for (const [key, values] of document.fields) { const iidx = this.invertedIndex.get(key as string); if (!iidx) { - throw new Error( - `Inverted index '${key.toString()}' not found, document not match schema` - ); + return; } await iidx.insert(trx, nid, values); } @@ -164,7 +184,7 @@ export class DataStruct { if (query.type === 'match') { const iidx = this.invertedIndex.get(query.field as string); if (!iidx) { - throw new Error(`Field '${query.field as string}' not found`); + return new Match(); } return await iidx.match(trx, query.match); } else if (query.type === 'boolean') { @@ -187,7 +207,7 @@ export class DataStruct { } else if (query.type === 'exists') { const iidx = this.invertedIndex.get(query.field as string); if (!iidx) { - throw new Error(`Field '${query.field as string}' not found`); + return new Match(); } return await iidx.all(trx); } @@ -217,31 +237,41 @@ export class DataStruct { query: Query, options: SearchOptions ): Promise> { - const pagination = { - skip: options.pagination?.skip ?? 0, - limit: options.pagination?.limit ?? 100, - }; + const startTime = performance.now(); + try { + const pagination = { + skip: options.pagination?.skip ?? 0, + limit: options.pagination?.limit ?? 100, + }; - const match = await this.query(trx, query); + const match = await this.query(trx, query); - const nids = match - .toArray() - .slice(pagination.skip, pagination.skip + pagination.limit); + const nids = match + .toArray() + .slice(pagination.skip, pagination.skip + pagination.limit); - const nodes = []; - for (const nid of nids) { - nodes.push(await this.resultNode(trx, match, nid, options)); + const nodes = []; + for (const nid of nids) { + nodes.push(await this.resultNode(trx, match, nid, options)); + } + + return { + pagination: { + count: match.size(), + hasMore: match.size() > pagination.limit + pagination.skip, + limit: pagination.limit, + skip: pagination.skip, + }, + nodes: nodes, + }; + } finally { + logger.debug( + `[indexer ${this.databaseName}] search`, + performance.now() - startTime, + 'ms', + query + ); } - - return { - pagination: { - count: match.size(), - hasMore: match.size() > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - nodes: nodes, - }; } async aggregate( @@ -250,95 +280,105 @@ export class DataStruct { field: string, options: AggregateOptions ): Promise> { - const pagination = { - skip: options.pagination?.skip ?? 0, - limit: options.pagination?.limit ?? 100, - }; + const startTime = performance.now(); + try { + const pagination = { + skip: options.pagination?.skip ?? 0, + limit: options.pagination?.limit ?? 100, + }; - const hitPagination = options.hits - ? { - skip: options.hits.pagination?.skip ?? 0, - limit: options.hits.pagination?.limit ?? 3, - } - : { - skip: 0, - limit: 0, - }; + const hitPagination = options.hits + ? { + skip: options.hits.pagination?.skip ?? 0, + limit: options.hits.pagination?.limit ?? 3, + } + : { + skip: 0, + limit: 0, + }; - const match = await this.query(trx, query); + const match = await this.query(trx, query); - const nids = match.toArray(); + const nids = match.toArray(); - const buckets: { - key: string; - nids: number[]; - hits: SearchResult['nodes']; - }[] = []; + const buckets: { + key: string; + nids: number[]; + hits: SearchResult['nodes']; + }[] = []; - for (const nid of nids) { - const values = (await trx.objectStore('records').get(nid))?.data.get( - field - ); - for (const value of values ?? []) { - let bucket; - let bucketIndex = buckets.findIndex(b => b.key === value); - if (bucketIndex === -1) { - bucket = { key: value, nids: [], hits: [] }; - buckets.push(bucket); - bucketIndex = buckets.length - 1; - } else { - bucket = buckets[bucketIndex]; - } + for (const nid of nids) { + const values = (await trx.objectStore('records').get(nid))?.data.get( + field + ); + for (const value of values ?? []) { + let bucket; + let bucketIndex = buckets.findIndex(b => b.key === value); + if (bucketIndex === -1) { + bucket = { key: value, nids: [], hits: [] }; + buckets.push(bucket); + bucketIndex = buckets.length - 1; + } else { + bucket = buckets[bucketIndex]; + } - if ( - bucketIndex >= pagination.skip && - bucketIndex < pagination.skip + pagination.limit - ) { - bucket.nids.push(nid); if ( - bucket.nids.length - 1 >= hitPagination.skip && - bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit + bucketIndex >= pagination.skip && + bucketIndex < pagination.skip + pagination.limit ) { - bucket.hits.push( - await this.resultNode(trx, match, nid, options.hits ?? {}) - ); + bucket.nids.push(nid); + if ( + bucket.nids.length - 1 >= hitPagination.skip && + bucket.nids.length - 1 < hitPagination.skip + hitPagination.limit + ) { + bucket.hits.push( + await this.resultNode(trx, match, nid, options.hits ?? {}) + ); + } } } } + + return { + buckets: buckets + .slice(pagination.skip, pagination.skip + pagination.limit) + .map(bucket => { + const result = { + key: bucket.key, + score: match.getScore(bucket.nids[0]), + count: bucket.nids.length, + } as AggregateResult['buckets'][number]; + + if (options.hits) { + (result as any).hits = { + pagination: { + count: bucket.nids.length, + hasMore: + bucket.nids.length > + hitPagination.limit + hitPagination.skip, + limit: hitPagination.limit, + skip: hitPagination.skip, + }, + nodes: bucket.hits, + } as SearchResult; + } + + return result; + }), + pagination: { + count: buckets.length, + hasMore: buckets.length > pagination.limit + pagination.skip, + limit: pagination.limit, + skip: pagination.skip, + }, + }; + } finally { + logger.debug( + `[indexer ${this.databaseName}] aggregate`, + performance.now() - startTime, + 'ms' + ); } - - return { - buckets: buckets - .slice(pagination.skip, pagination.skip + pagination.limit) - .map(bucket => { - const result = { - key: bucket.key, - score: match.getScore(bucket.nids[0]), - count: bucket.nids.length, - } as AggregateResult['buckets'][number]; - - if (options.hits) { - (result as any).hits = { - pagination: { - count: bucket.nids.length, - hasMore: - bucket.nids.length > hitPagination.limit + hitPagination.skip, - limit: hitPagination.limit, - skip: hitPagination.skip, - }, - nodes: bucket.hits, - } as SearchResult; - } - - return result; - }), - pagination: { - count: buckets.length, - hasMore: buckets.length > pagination.limit + pagination.skip, - limit: pagination.limit, - skip: pagination.skip, - }, - }; } async getAll( diff --git a/packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts b/packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts index d94393ae1a..4230e18dfc 100644 --- a/packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts +++ b/packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts @@ -21,7 +21,11 @@ export interface InvertedIndex { } export class StringInvertedIndex implements InvertedIndex { - constructor(readonly fieldKey: string) {} + constructor( + readonly fieldKey: string, + readonly index: boolean = true, + readonly store: boolean = true + ) {} async match(trx: DataStructROTransaction, term: string): Promise { const objs = await trx @@ -69,7 +73,11 @@ export class StringInvertedIndex implements InvertedIndex { } export class IntegerInvertedIndex implements InvertedIndex { - constructor(readonly fieldKey: string) {} + constructor( + readonly fieldKey: string, + readonly index: boolean = true, + readonly store: boolean = true + ) {} async match(trx: DataStructROTransaction, term: string): Promise { const objs = await trx @@ -118,7 +126,11 @@ export class IntegerInvertedIndex implements InvertedIndex { } export class BooleanInvertedIndex implements InvertedIndex { - constructor(readonly fieldKey: string) {} + constructor( + readonly fieldKey: string, + readonly index: boolean = true, + readonly store: boolean = true + ) {} // eslint-disable-next-line sonarjs/no-identical-functions async all(trx: DataStructROTransaction): Promise { @@ -172,7 +184,11 @@ export class BooleanInvertedIndex implements InvertedIndex { } export class FullTextInvertedIndex implements InvertedIndex { - constructor(readonly fieldKey: string) {} + constructor( + readonly fieldKey: string, + readonly index: boolean = true, + readonly store: boolean = true + ) {} async match(trx: DataStructROTransaction, term: string): Promise { const queryTokens = new GeneralTokenizer().tokenize(term); diff --git a/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts b/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts index ffa150b531..96a77812f7 100644 --- a/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts +++ b/packages/common/infra/src/sync/indexer/impl/memory/data-struct.ts @@ -31,13 +31,15 @@ export class DataStruct { constructor(schema: Schema) { for (const [key, type] of Object.entries(schema)) { - if (type === 'String') { + const typeInfo = typeof type === 'string' ? { type } : type; + + if (typeInfo.type === 'String') { this.invertedIndex.set(key, new StringInvertedIndex(key)); - } else if (type === 'Integer') { + } else if (typeInfo.type === 'Integer') { this.invertedIndex.set(key, new IntegerInvertedIndex(key)); - } else if (type === 'FullText') { + } else if (typeInfo.type === 'FullText') { this.invertedIndex.set(key, new FullTextInvertedIndex(key)); - } else if (type === 'Boolean') { + } else if (typeInfo.type === 'Boolean') { this.invertedIndex.set(key, new BooleanInvertedIndex(key)); } else { throw new Error(`Field type '${type}' not supported`); diff --git a/packages/common/infra/src/sync/indexer/schema.ts b/packages/common/infra/src/sync/indexer/schema.ts index dd534c48a4..891d8a891b 100644 --- a/packages/common/infra/src/sync/indexer/schema.ts +++ b/packages/common/infra/src/sync/indexer/schema.ts @@ -1,6 +1,24 @@ import type { FieldType } from './field-type'; -export type Schema = Record; +export type Schema = Record< + string, + | FieldType + | { + type: FieldType; + /** + * If false, the field will not be indexed, and thus not searchable. + * + * default: true + */ + index?: boolean; + /** + * If false, the field will not be stored, and not included in the search result. + * + * default: true + */ + store?: boolean; + } +>; export function defineSchema(schema: T): T { return schema; diff --git a/packages/frontend/core/src/components/blocksuite/block-suite-editor/bi-directional-link-panel.tsx b/packages/frontend/core/src/components/blocksuite/block-suite-editor/bi-directional-link-panel.tsx index 9c201c988a..891156687f 100644 --- a/packages/frontend/core/src/components/blocksuite/block-suite-editor/bi-directional-link-panel.tsx +++ b/packages/frontend/core/src/components/blocksuite/block-suite-editor/bi-directional-link-panel.tsx @@ -1,6 +1,10 @@ -import { DocLinksService } from '@affine/core/modules/doc-link'; +import { + type Backlink, + DocLinksService, + type Link, +} from '@affine/core/modules/doc-link'; import { useI18n } from '@affine/i18n'; -import { useLiveData, useServices } from '@toeverything/infra'; +import { LiveData, useLiveData, useServices } from '@toeverything/infra'; import { useCallback, useState } from 'react'; import { AffinePageReference } from '../../affine/reference-link'; @@ -13,8 +17,12 @@ export const BiDirectionalLinkPanel = () => { }); const t = useI18n(); - const links = useLiveData(docLinksService.links.links$); - const backlinks = useLiveData(docLinksService.backlinks.backlinks$); + const links = useLiveData( + show ? docLinksService.links.links$ : new LiveData([] as Link[]) + ); + const backlinks = useLiveData( + show ? docLinksService.backlinks.backlinks$ : new LiveData([] as Backlink[]) + ); const handleClickShow = useCallback(() => { setShow(!show); }, [show]); diff --git a/packages/frontend/core/src/components/doc-properties/table.tsx b/packages/frontend/core/src/components/doc-properties/table.tsx index 8a3eded0a1..4edc0f26e9 100644 --- a/packages/frontend/core/src/components/doc-properties/table.tsx +++ b/packages/frontend/core/src/components/doc-properties/table.tsx @@ -103,7 +103,10 @@ export const DocPropertiesTableHeader = ({ EditorSettingService, }); const docBacklinks = docLinksService.backlinks; - const backlinks = useLiveData(docBacklinks.backlinks$); + const backlinks = useMemo( + () => docBacklinks.backlinks$.value, + [docBacklinks] + ); const displayDocInfo = useLiveData( editorSettingService.editorSetting.settings$.selector(s => s.displayDocInfo) diff --git a/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts b/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts index 23c2e93fcf..19a99e4825 100644 --- a/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts +++ b/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts @@ -36,7 +36,7 @@ export class DocsIndexer extends Entity { /** * increase this number to re-index all docs */ - static INDEXER_VERSION = 2; + static INDEXER_VERSION = 5; private readonly jobQueue: JobQueue = new IndexedDBJobQueue( diff --git a/packages/frontend/core/src/modules/docs-search/schema.ts b/packages/frontend/core/src/modules/docs-search/schema.ts index 5e401ab90a..ec395320a4 100644 --- a/packages/frontend/core/src/modules/docs-search/schema.ts +++ b/packages/frontend/core/src/modules/docs-search/schema.ts @@ -2,6 +2,9 @@ import { defineSchema } from '@toeverything/infra'; export const docIndexSchema = defineSchema({ title: 'FullText', + // summary of the doc, used for preview + summary: { type: 'String', index: false }, + journal: 'String', }); export type DocIndexSchema = typeof docIndexSchema; @@ -15,9 +18,16 @@ export const blockIndexSchema = defineSchema({ // reference doc id // ['xxx','yyy'] refDocId: 'String', - // reference info + // reference info, used for backlink to specific block // [{"docId":"xxx","mode":"page","blockIds":["gt5Yfq1maYvgNgpi13rIq"]},{"docId":"yyy","mode":"edgeless","blockIds":["k5prpOlDF-9CzfatmO0W7"]}] - ref: 'String', + ref: { type: 'String', index: false }, + // parent block flavour + parentFlavour: 'String', + // parent block id + parentBlockId: 'String', + // additional info + // { "databaseName": "xxx" } + additional: { type: 'String', index: false }, }); export type BlockIndexSchema = typeof blockIndexSchema; diff --git a/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts b/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts index 06b6462d70..21d03eccd1 100644 --- a/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts +++ b/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts @@ -9,10 +9,10 @@ import { Array as YArray, Doc as YDoc, Map as YMap, - type Text as YText, + Text as YText, } from 'yjs'; -import type { BlockIndexSchema, docIndexSchema } from '../schema'; +import type { BlockIndexSchema, DocIndexSchema } from '../schema'; import type { WorkerIngoingMessage, WorkerInput, @@ -68,12 +68,6 @@ async function crawlingDocData({ return {}; } - const ydoc = new YDoc(); - - if (!isEmptyUpdate(docBuffer)) { - applyUpdate(ydoc, docBuffer); - } - let docExists: boolean | null = null; ( @@ -89,23 +83,68 @@ async function crawlingDocData({ deletedDoc: [docId], }; } else { + const ydoc = new YDoc(); + let docTitle = ''; + let summaryLenNeeded = 1000; + let summary = ''; + const blockDocuments: Document[] = []; + + if (!isEmptyUpdate(docBuffer)) { + applyUpdate(ydoc, docBuffer); + } + const blocks = ydoc.getMap('blocks'); if (blocks.size === 0) { - return {}; + return { deletedDoc: [docId] }; } - let docTitle = ''; - - const blockDocuments: Document[] = []; - + let rootBlockId: string | null = null; for (const block of blocks.values()) { const flavour = block.get('sys:flavour')?.toString(); const blockId = block.get('sys:id')?.toString(); - - if (!flavour || !blockId) { - continue; + if (flavour === 'affine:page' && blockId) { + rootBlockId = blockId; } + } + + if (!rootBlockId) { + return { deletedDoc: [docId] }; + } + + const queue: { parent?: string; id: string }[] = [{ id: rootBlockId }]; + const visited = new Set(); // avoid loop + + const pushChildren = (id: string, block: YMap) => { + const children = block.get('sys:children'); + if (children instanceof YArray && children.length) { + for (let i = children.length - 1; i >= 0; i--) { + const childId = children.get(i); + if (childId && !visited.has(childId)) { + queue.push({ parent: id, id: childId }); + visited.add(childId); + } + } + } + }; + + while (queue.length) { + const next = queue.pop(); + if (!next) { + break; + } + + const { parent: parentBlockId, id: blockId } = next; + const block = blockId ? blocks.get(blockId) : null; + const parentBlock = parentBlockId ? blocks.get(parentBlockId) : null; + if (!block) { + break; + } + + const flavour = block.get('sys:flavour')?.toString(); + const parentFlavour = parentBlock?.get('sys:flavour')?.toString(); + + pushChildren(blockId, block); if (flavour === 'affine:page') { docTitle = block.get('prop:title').toString(); @@ -150,6 +189,11 @@ async function crawlingDocData({ .filter(ref => !!ref) ); + const databaseName = + flavour === 'affine:paragraph' && parentFlavour === 'affine:database' // if block is a database row + ? parentBlock?.get('prop:title')?.toString() + : undefined; + blockDocuments.push( Document.from(`${docId}:${blockId}`, { docId, @@ -164,8 +208,18 @@ async function crawlingDocData({ }, { refDocId: [], ref: [] } ), + parentFlavour, + parentBlockId, + additional: databaseName + ? JSON.stringify({ databaseName }) + : undefined, }) ); + + if (summaryLenNeeded > 0) { + summary += text.toString(); + summaryLenNeeded -= text.length; + } } if ( @@ -183,6 +237,8 @@ async function crawlingDocData({ blockId, refDocId: [pageId], ref: [JSON.stringify({ docId: pageId, ...params })], + parentFlavour, + parentBlockId, }) ); } @@ -197,6 +253,8 @@ async function crawlingDocData({ flavour, blockId, blob: [blobId], + parentFlavour, + parentBlockId, }) ); } @@ -237,6 +295,8 @@ async function crawlingDocData({ flavour, blockId, content: texts, + parentFlavour, + parentBlockId, }) ); } @@ -244,32 +304,35 @@ async function crawlingDocData({ if (flavour === 'affine:database') { const texts = []; const columnsObj = block.get('prop:columns'); - if (!(columnsObj instanceof YArray)) { - continue; + const databaseTitle = block.get('prop:title'); + if (databaseTitle instanceof YText) { + texts.push(databaseTitle.toString()); } - for (const column of columnsObj) { - if (!(column instanceof YMap)) { - continue; - } - if (typeof column.get('name') === 'string') { - texts.push(column.get('name')); - } - - const data = column.get('data'); - if (!(data instanceof YMap)) { - continue; - } - const options = data.get('options'); - if (!(options instanceof YArray)) { - continue; - } - for (const option of options) { - if (!(option instanceof YMap)) { + if (columnsObj instanceof YArray) { + for (const column of columnsObj) { + if (!(column instanceof YMap)) { continue; } - const value = option.get('value'); - if (typeof value === 'string') { - texts.push(value); + if (typeof column.get('name') === 'string') { + texts.push(column.get('name')); + } + + const data = column.get('data'); + if (!(data instanceof YMap)) { + continue; + } + const options = data.get('options'); + if (!(options instanceof YArray)) { + continue; + } + for (const option of options) { + if (!(option instanceof YMap)) { + continue; + } + const value = option.get('value'); + if (typeof value === 'string') { + texts.push(value); + } } } } @@ -289,8 +352,9 @@ async function crawlingDocData({ addedDoc: [ { id: docId, - doc: Document.from(docId, { + doc: Document.from(docId, { title: docTitle, + summary, }), blocks: blockDocuments, }, diff --git a/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx b/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx index d86b2f5f4c..4df22ea9d1 100644 --- a/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx +++ b/packages/frontend/core/src/modules/explorer/views/nodes/doc/index.tsx @@ -22,6 +22,7 @@ import { useServices, } from '@toeverything/infra'; import { useCallback, useLayoutEffect, useMemo, useState } from 'react'; +import { NEVER } from 'rxjs'; import { ExplorerTreeNode, type ExplorerTreeNodeDropEffect } from '../../tree'; import type { GenericExplorerNode } from '../types'; @@ -82,10 +83,15 @@ export const ExplorerDocNode = ({ const children = useLiveData( useMemo( - () => LiveData.from(docsSearchService.watchRefsFrom(docId), null), - [docsSearchService, docId] + () => + LiveData.from( + !collapsed ? docsSearchService.watchRefsFrom(docId) : NEVER, + null + ), + [docsSearchService, docId, collapsed] ) ); + const searching = children === null; const indexerLoading = useLiveData( docsSearchService.indexer.status$.map( @@ -231,7 +237,9 @@ export const ExplorerDocNode = ({ } reorderable={reorderable} onRename={handleRename} - childrenPlaceholder={} + childrenPlaceholder={ + searching ? null : + } operations={finalOperations} dropEffect={handleDropEffectOnDoc} data-testid={`explorer-doc-${docId}`}