From 627771948f1e84e2620b9c7c38f5dafedb3cc543 Mon Sep 17 00:00:00 2001 From: DarkSky <25152247+darkskygit@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:26:51 +0800 Subject: [PATCH] feat: paged query for outdated embedding cleanup (#13335) ## Summary by CodeRabbit * **Refactor** * Improved the workspace cleanup process for trashed document embeddings to use a more efficient, incremental batching approach, resulting in better performance and reliability for large numbers of workspaces. No visible changes to user interface or functionality. --- .../server/src/plugins/copilot/cron.ts | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/packages/backend/server/src/plugins/copilot/cron.ts b/packages/backend/server/src/plugins/copilot/cron.ts index 3b0a4558a0..a694f8c97b 100644 --- a/packages/backend/server/src/plugins/copilot/cron.ts +++ b/packages/backend/server/src/plugins/copilot/cron.ts @@ -1,14 +1,18 @@ import { Injectable, Logger } from '@nestjs/common'; import { Cron, CronExpression } from '@nestjs/schedule'; -import { JobQueue, OneDay, OnJob } from '../../base'; +import { JOB_SIGNAL, JobQueue, OneDay, OnJob } from '../../base'; import { Models } from '../../models'; +const CLEANUP_EMBEDDING_JOB_BATCH_SIZE = 100; + declare global { interface Jobs { 'copilot.session.cleanupEmptySessions': {}; 'copilot.session.generateMissingTitles': {}; - 'copilot.workspace.cleanupTrashedDocEmbeddings': {}; + 'copilot.workspace.cleanupTrashedDocEmbeddings': { + nextSid?: number; + }; } } @@ -85,10 +89,17 @@ export class CopilotCronJobs { } @OnJob('copilot.workspace.cleanupTrashedDocEmbeddings') - async cleanupTrashedDocEmbeddings() { - const workspaces = await this.models.workspace.list(undefined, { - id: true, - }); + async cleanupTrashedDocEmbeddings( + params: Jobs['copilot.workspace.cleanupTrashedDocEmbeddings'] + ) { + const nextSid = params.nextSid ?? 0; + let workspaces = await this.models.workspace.listAfterSid( + nextSid, + CLEANUP_EMBEDDING_JOB_BATCH_SIZE + ); + if (!workspaces.length) { + return JOB_SIGNAL.Done; + } for (const { id: workspaceId } of workspaces) { await this.jobs.add( 'copilot.embedding.cleanupTrashedDocEmbeddings', @@ -96,5 +107,7 @@ export class CopilotCronJobs { { jobId: `cleanup-trashed-doc-embeddings-${workspaceId}` } ); } + params.nextSid = workspaces[workspaces.length - 1].sid; + return JOB_SIGNAL.Repeat; } }