feat(infra): collect more data to indexer (#8528)

This commit is contained in:
EYHN
2024-10-19 20:22:26 +08:00
committed by GitHub
parent 8f92be926b
commit 01c3a3b4c0
11 changed files with 341 additions and 169 deletions

View File

@@ -1,6 +1,10 @@
import { DocLinksService } from '@affine/core/modules/doc-link';
import {
type Backlink,
DocLinksService,
type Link,
} from '@affine/core/modules/doc-link';
import { useI18n } from '@affine/i18n';
import { useLiveData, useServices } from '@toeverything/infra';
import { LiveData, useLiveData, useServices } from '@toeverything/infra';
import { useCallback, useState } from 'react';
import { AffinePageReference } from '../../affine/reference-link';
@@ -13,8 +17,12 @@ export const BiDirectionalLinkPanel = () => {
});
const t = useI18n();
const links = useLiveData(docLinksService.links.links$);
const backlinks = useLiveData(docLinksService.backlinks.backlinks$);
const links = useLiveData(
show ? docLinksService.links.links$ : new LiveData([] as Link[])
);
const backlinks = useLiveData(
show ? docLinksService.backlinks.backlinks$ : new LiveData([] as Backlink[])
);
const handleClickShow = useCallback(() => {
setShow(!show);
}, [show]);

View File

@@ -103,7 +103,10 @@ export const DocPropertiesTableHeader = ({
EditorSettingService,
});
const docBacklinks = docLinksService.backlinks;
const backlinks = useLiveData(docBacklinks.backlinks$);
const backlinks = useMemo(
() => docBacklinks.backlinks$.value,
[docBacklinks]
);
const displayDocInfo = useLiveData(
editorSettingService.editorSetting.settings$.selector(s => s.displayDocInfo)

View File

@@ -36,7 +36,7 @@ export class DocsIndexer extends Entity {
/**
* increase this number to re-index all docs
*/
static INDEXER_VERSION = 2;
static INDEXER_VERSION = 5;
private readonly jobQueue: JobQueue<IndexerJobPayload> =
new IndexedDBJobQueue<IndexerJobPayload>(

View File

@@ -2,6 +2,9 @@ import { defineSchema } from '@toeverything/infra';
export const docIndexSchema = defineSchema({
title: 'FullText',
// summary of the doc, used for preview
summary: { type: 'String', index: false },
journal: 'String',
});
export type DocIndexSchema = typeof docIndexSchema;
@@ -15,9 +18,16 @@ export const blockIndexSchema = defineSchema({
// reference doc id
// ['xxx','yyy']
refDocId: 'String',
// reference info
// reference info, used for backlink to specific block
// [{"docId":"xxx","mode":"page","blockIds":["gt5Yfq1maYvgNgpi13rIq"]},{"docId":"yyy","mode":"edgeless","blockIds":["k5prpOlDF-9CzfatmO0W7"]}]
ref: 'String',
ref: { type: 'String', index: false },
// parent block flavour
parentFlavour: 'String',
// parent block id
parentBlockId: 'String',
// additional info
// { "databaseName": "xxx" }
additional: { type: 'String', index: false },
});
export type BlockIndexSchema = typeof blockIndexSchema;

View File

@@ -9,10 +9,10 @@ import {
Array as YArray,
Doc as YDoc,
Map as YMap,
type Text as YText,
Text as YText,
} from 'yjs';
import type { BlockIndexSchema, docIndexSchema } from '../schema';
import type { BlockIndexSchema, DocIndexSchema } from '../schema';
import type {
WorkerIngoingMessage,
WorkerInput,
@@ -68,12 +68,6 @@ async function crawlingDocData({
return {};
}
const ydoc = new YDoc();
if (!isEmptyUpdate(docBuffer)) {
applyUpdate(ydoc, docBuffer);
}
let docExists: boolean | null = null;
(
@@ -89,23 +83,68 @@ async function crawlingDocData({
deletedDoc: [docId],
};
} else {
const ydoc = new YDoc();
let docTitle = '';
let summaryLenNeeded = 1000;
let summary = '';
const blockDocuments: Document<BlockIndexSchema>[] = [];
if (!isEmptyUpdate(docBuffer)) {
applyUpdate(ydoc, docBuffer);
}
const blocks = ydoc.getMap<any>('blocks');
if (blocks.size === 0) {
return {};
return { deletedDoc: [docId] };
}
let docTitle = '';
const blockDocuments: Document<BlockIndexSchema>[] = [];
let rootBlockId: string | null = null;
for (const block of blocks.values()) {
const flavour = block.get('sys:flavour')?.toString();
const blockId = block.get('sys:id')?.toString();
if (!flavour || !blockId) {
continue;
if (flavour === 'affine:page' && blockId) {
rootBlockId = blockId;
}
}
if (!rootBlockId) {
return { deletedDoc: [docId] };
}
const queue: { parent?: string; id: string }[] = [{ id: rootBlockId }];
const visited = new Set<string>(); // avoid loop
const pushChildren = (id: string, block: YMap<any>) => {
const children = block.get('sys:children');
if (children instanceof YArray && children.length) {
for (let i = children.length - 1; i >= 0; i--) {
const childId = children.get(i);
if (childId && !visited.has(childId)) {
queue.push({ parent: id, id: childId });
visited.add(childId);
}
}
}
};
while (queue.length) {
const next = queue.pop();
if (!next) {
break;
}
const { parent: parentBlockId, id: blockId } = next;
const block = blockId ? blocks.get(blockId) : null;
const parentBlock = parentBlockId ? blocks.get(parentBlockId) : null;
if (!block) {
break;
}
const flavour = block.get('sys:flavour')?.toString();
const parentFlavour = parentBlock?.get('sys:flavour')?.toString();
pushChildren(blockId, block);
if (flavour === 'affine:page') {
docTitle = block.get('prop:title').toString();
@@ -150,6 +189,11 @@ async function crawlingDocData({
.filter(ref => !!ref)
);
const databaseName =
flavour === 'affine:paragraph' && parentFlavour === 'affine:database' // if block is a database row
? parentBlock?.get('prop:title')?.toString()
: undefined;
blockDocuments.push(
Document.from<BlockIndexSchema>(`${docId}:${blockId}`, {
docId,
@@ -164,8 +208,18 @@ async function crawlingDocData({
},
{ refDocId: [], ref: [] }
),
parentFlavour,
parentBlockId,
additional: databaseName
? JSON.stringify({ databaseName })
: undefined,
})
);
if (summaryLenNeeded > 0) {
summary += text.toString();
summaryLenNeeded -= text.length;
}
}
if (
@@ -183,6 +237,8 @@ async function crawlingDocData({
blockId,
refDocId: [pageId],
ref: [JSON.stringify({ docId: pageId, ...params })],
parentFlavour,
parentBlockId,
})
);
}
@@ -197,6 +253,8 @@ async function crawlingDocData({
flavour,
blockId,
blob: [blobId],
parentFlavour,
parentBlockId,
})
);
}
@@ -237,6 +295,8 @@ async function crawlingDocData({
flavour,
blockId,
content: texts,
parentFlavour,
parentBlockId,
})
);
}
@@ -244,32 +304,35 @@ async function crawlingDocData({
if (flavour === 'affine:database') {
const texts = [];
const columnsObj = block.get('prop:columns');
if (!(columnsObj instanceof YArray)) {
continue;
const databaseTitle = block.get('prop:title');
if (databaseTitle instanceof YText) {
texts.push(databaseTitle.toString());
}
for (const column of columnsObj) {
if (!(column instanceof YMap)) {
continue;
}
if (typeof column.get('name') === 'string') {
texts.push(column.get('name'));
}
const data = column.get('data');
if (!(data instanceof YMap)) {
continue;
}
const options = data.get('options');
if (!(options instanceof YArray)) {
continue;
}
for (const option of options) {
if (!(option instanceof YMap)) {
if (columnsObj instanceof YArray) {
for (const column of columnsObj) {
if (!(column instanceof YMap)) {
continue;
}
const value = option.get('value');
if (typeof value === 'string') {
texts.push(value);
if (typeof column.get('name') === 'string') {
texts.push(column.get('name'));
}
const data = column.get('data');
if (!(data instanceof YMap)) {
continue;
}
const options = data.get('options');
if (!(options instanceof YArray)) {
continue;
}
for (const option of options) {
if (!(option instanceof YMap)) {
continue;
}
const value = option.get('value');
if (typeof value === 'string') {
texts.push(value);
}
}
}
}
@@ -289,8 +352,9 @@ async function crawlingDocData({
addedDoc: [
{
id: docId,
doc: Document.from<typeof docIndexSchema>(docId, {
doc: Document.from<DocIndexSchema>(docId, {
title: docTitle,
summary,
}),
blocks: blockDocuments,
},

View File

@@ -22,6 +22,7 @@ import {
useServices,
} from '@toeverything/infra';
import { useCallback, useLayoutEffect, useMemo, useState } from 'react';
import { NEVER } from 'rxjs';
import { ExplorerTreeNode, type ExplorerTreeNodeDropEffect } from '../../tree';
import type { GenericExplorerNode } from '../types';
@@ -82,10 +83,15 @@ export const ExplorerDocNode = ({
const children = useLiveData(
useMemo(
() => LiveData.from(docsSearchService.watchRefsFrom(docId), null),
[docsSearchService, docId]
() =>
LiveData.from(
!collapsed ? docsSearchService.watchRefsFrom(docId) : NEVER,
null
),
[docsSearchService, docId, collapsed]
)
);
const searching = children === null;
const indexerLoading = useLiveData(
docsSearchService.indexer.status$.map(
@@ -231,7 +237,9 @@ export const ExplorerDocNode = ({
}
reorderable={reorderable}
onRename={handleRename}
childrenPlaceholder={<Empty onDrop={handleDropOnPlaceholder} />}
childrenPlaceholder={
searching ? null : <Empty onDrop={handleDropOnPlaceholder} />
}
operations={finalOperations}
dropEffect={handleDropEffectOnDoc}
data-testid={`explorer-doc-${docId}`}