feat(infra): opti indexer performance (#8557)

This commit is contained in:
EYHN
2024-10-22 06:41:07 +00:00
parent d482e2f82e
commit 6ecdc8db7a
11 changed files with 264 additions and 217 deletions

View File

@@ -139,19 +139,7 @@ export class DocsIndexer extends Entity {
return;
}
const allIndexedDocs = (
await this.docIndex.search(
{
type: 'all',
},
{
pagination: {
limit: Number.MAX_SAFE_INTEGER,
skip: 0,
},
}
)
).nodes.map(n => n.id);
const allIndexedDocs = (await this.docIndex.getAll()).map(d => d.id);
workerOutput = await worker.run({
type: 'rootDoc',

View File

@@ -20,7 +20,10 @@ import type {
WorkerOutput,
} from './types';
let cachedRootDoc: { doc: YDoc; hash: string } | null = null;
const LRU_CACHE_SIZE = 5;
// lru cache for ydoc instances, last used at the end of the array
const lruCache = [] as { doc: YDoc; hash: string }[];
async function digest(data: Uint8Array) {
if (
@@ -35,6 +38,29 @@ async function digest(data: Uint8Array) {
return lib0Digest(data);
}
async function getOrCreateCachedYDoc(data: Uint8Array) {
try {
const hash = toHexString(await digest(data));
const cachedIndex = lruCache.findIndex(item => item.hash === hash);
if (cachedIndex !== -1) {
const cached = lruCache.splice(cachedIndex, 1)[0];
lruCache.push(cached);
return cached.doc;
} else {
const doc = new YDoc();
if (!isEmptyUpdate(data)) {
applyUpdate(doc, data);
}
lruCache.push({ doc, hash });
return doc;
}
} finally {
if (lruCache.length > LRU_CACHE_SIZE) {
lruCache.shift();
}
}
}
async function crawlingDocData({
docBuffer,
storageDocId,
@@ -45,16 +71,7 @@ async function crawlingDocData({
return {};
}
const rootDocBufferHash = toHexString(await digest(rootDocBuffer));
let yRootDoc;
if (cachedRootDoc && cachedRootDoc.hash === rootDocBufferHash) {
yRootDoc = cachedRootDoc.doc;
} else {
yRootDoc = new YDoc();
applyUpdate(yRootDoc, rootDocBuffer);
cachedRootDoc = { doc: yRootDoc, hash: rootDocBufferHash };
}
const yRootDoc = await getOrCreateCachedYDoc(rootDocBuffer);
let docId = null;
for (const [id, subdoc] of yRootDoc.getMap('spaces')) {
@@ -83,16 +100,18 @@ async function crawlingDocData({
deletedDoc: [docId],
};
} else {
const ydoc = new YDoc();
if (isEmptyUpdate(docBuffer)) {
return {
deletedDoc: [docId],
};
}
const ydoc = await getOrCreateCachedYDoc(docBuffer);
let docTitle = '';
let summaryLenNeeded = 1000;
let summary = '';
const blockDocuments: Document<BlockIndexSchema>[] = [];
if (!isEmptyUpdate(docBuffer)) {
applyUpdate(ydoc, docBuffer);
}
const blocks = ydoc.getMap<any>('blocks');
if (blocks.size === 0) {
@@ -363,16 +382,14 @@ async function crawlingDocData({
}
}
function crawlingRootDocData({
async function crawlingRootDocData({
allIndexedDocs,
rootDocBuffer,
reindexAll,
}: WorkerInput & {
type: 'rootDoc';
}): WorkerOutput {
const ydoc = new YDoc();
applyUpdate(ydoc, rootDocBuffer);
}): Promise<WorkerOutput> {
const ydoc = await getOrCreateCachedYDoc(rootDocBuffer);
const docs = ydoc.getMap('meta').get('pages') as
| YArray<YMap<any>>
@@ -422,7 +439,7 @@ globalThis.onmessage = async (event: MessageEvent<WorkerIngoingMessage>) => {
try {
let data;
if (input.type === 'rootDoc') {
data = crawlingRootDocData(input);
data = await crawlingRootDocData(input);
} else {
data = await crawlingDocData(input);
}