feat: database indexing support (#14181)

This commit is contained in:
DarkSky
2025-12-30 05:23:09 +08:00
committed by GitHub
parent 95a5e941e7
commit ff2e96d847
7 changed files with 339 additions and 194 deletions

View File

@@ -1,11 +1,6 @@
import { FactoryProvider, Injectable, Logger } from '@nestjs/common';
import { ModuleRef } from '@nestjs/core';
import {
applyUpdate,
diffUpdate,
Doc as YDoc,
encodeStateVectorFromUpdate,
} from 'yjs';
import { diffUpdate, encodeStateVectorFromUpdate } from 'yjs';
import {
Cache,
@@ -48,16 +43,14 @@ export abstract class DocReader {
protected readonly blobStorage: WorkspaceBlobStorage
) {}
// keep methods to allow test mocking
parseDocContent(bin: Uint8Array, maxSummaryLength = 150) {
const doc = new YDoc();
applyUpdate(doc, bin);
return parsePageDoc(doc, { maxSummaryLength });
return parsePageDoc(bin, { maxSummaryLength });
}
// keep methods to allow test mocking
parseWorkspaceContent(bin: Uint8Array) {
const doc = new YDoc();
applyUpdate(doc, bin);
return parseWorkspaceDoc(doc);
return parseWorkspaceDoc(bin);
}
abstract getDoc(

View File

@@ -1,6 +1,6 @@
import { Array as YArray, Doc as YDoc, Map as YMap } from 'yjs';
import {
parsePageDocFromBinary,
parseWorkspaceDocFromBinary,
parseYDocFromBinary,
parseYDocToMarkdown,
readAllDocIdsFromRootDoc,
@@ -16,144 +16,26 @@ export interface WorkspaceDocContent {
avatarKey: string;
}
type KnownFlavour =
| 'affine:page'
| 'affine:note'
| 'affine:surface'
| 'affine:paragraph'
| 'affine:list'
| 'affine:code'
| 'affine:image'
| 'affine:attachment'
| 'affine:transcription'
| 'affine:callout'
| 'affine:table';
export function parseWorkspaceDoc(doc: YDoc): WorkspaceDocContent | null {
// not a workspace doc
if (!doc.share.has('meta')) {
return null;
}
const meta = doc.getMap('meta');
return {
name: meta.get('name') as string,
avatarKey: meta.get('avatar') as string,
};
export interface ParsePageOptions {
maxSummaryLength?: number;
}
export interface ParsePageOptions {
maxSummaryLength: number;
export function parseWorkspaceDoc(
snapshot: Uint8Array
): WorkspaceDocContent | null {
return parseWorkspaceDocFromBinary(Buffer.from(snapshot)) ?? null;
}
export function parsePageDoc(
doc: YDoc,
docSnapshot: Uint8Array,
opts: ParsePageOptions = { maxSummaryLength: 150 }
): PageDocContent | null {
// not a page doc
if (!doc.share.has('blocks')) {
return null;
}
const blocks = doc.getMap<YMap<any>>('blocks');
if (!blocks.size) {
return null;
}
const content: PageDocContent = {
title: '',
summary: '',
};
let summaryLenNeeded = opts.maxSummaryLength;
let root: YMap<any> | null = null;
for (const block of blocks.values()) {
const flavour = block.get('sys:flavour') as KnownFlavour;
if (flavour === 'affine:page') {
content.title = block.get('prop:title') as string;
root = block;
}
}
if (!root) {
return null;
}
const queue: string[] = [root.get('sys:id')];
function pushChildren(block: YMap<any>) {
const children = block.get('sys:children') as YArray<string> | undefined;
if (children?.length) {
for (let i = children.length - 1; i >= 0; i--) {
queue.push(children.get(i));
}
}
}
while (queue.length) {
const blockId = queue.pop();
const block = blockId ? blocks.get(blockId) : null;
if (!block) {
break;
}
const flavour = block.get('sys:flavour') as KnownFlavour;
switch (flavour) {
case 'affine:page':
case 'affine:note': {
pushChildren(block);
break;
}
case 'affine:attachment':
case 'affine:transcription':
case 'affine:callout': {
// only extract text in full content mode
if (summaryLenNeeded === -1) {
pushChildren(block);
}
break;
}
case 'affine:table': {
// only extract text in full content mode
if (summaryLenNeeded === -1) {
const contents: string[] = [...block.keys()]
.map(key => {
if (key.startsWith('prop:cells.') && key.endsWith('.text')) {
return block.get(key)?.toString() ?? '';
}
return '';
})
.filter(Boolean);
content.summary += contents.join('|');
}
break;
}
case 'affine:paragraph':
case 'affine:list':
case 'affine:code': {
pushChildren(block);
const text = block.get('prop:text');
if (!text) {
continue;
}
if (summaryLenNeeded === -1) {
content.summary += text.toString();
} else if (summaryLenNeeded > 0) {
content.summary += text.toString();
summaryLenNeeded -= text.length;
} else {
break;
}
}
}
}
return content;
return (
parsePageDocFromBinary(
Buffer.from(docSnapshot),
opts?.maxSummaryLength ?? 150
) ?? null
);
}
export function readAllDocIdsFromWorkspaceSnapshot(snapshot: Uint8Array) {

View File

@@ -42,6 +42,8 @@ export const parseDoc = serverNativeModule.parseDoc;
export const htmlSanitize = serverNativeModule.htmlSanitize;
export const parseYDocFromBinary = serverNativeModule.parseDocFromBinary;
export const parseYDocToMarkdown = serverNativeModule.parseDocToMarkdown;
export const parsePageDocFromBinary = serverNativeModule.parsePageDoc;
export const parseWorkspaceDocFromBinary = serverNativeModule.parseWorkspaceDoc;
export const readAllDocIdsFromRootDoc =
serverNativeModule.readAllDocIdsFromRootDoc;
export const AFFINE_PRO_PUBLIC_KEY = serverNativeModule.AFFINE_PRO_PUBLIC_KEY;