feat: paged query for outdated embedding cleanup (#13335)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **Refactor**
* Improved the workspace cleanup process for trashed document embeddings
to use a more efficient, incremental batching approach, resulting in
better performance and reliability for large numbers of workspaces. No
visible changes to user interface or functionality.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-07-28 13:26:51 +08:00
committed by GitHub
parent 0e3691e54e
commit 627771948f

View File

@@ -1,14 +1,18 @@
import { Injectable, Logger } from '@nestjs/common';
import { Cron, CronExpression } from '@nestjs/schedule';
import { JobQueue, OneDay, OnJob } from '../../base';
import { JOB_SIGNAL, JobQueue, OneDay, OnJob } from '../../base';
import { Models } from '../../models';
const CLEANUP_EMBEDDING_JOB_BATCH_SIZE = 100;
declare global {
interface Jobs {
'copilot.session.cleanupEmptySessions': {};
'copilot.session.generateMissingTitles': {};
'copilot.workspace.cleanupTrashedDocEmbeddings': {};
'copilot.workspace.cleanupTrashedDocEmbeddings': {
nextSid?: number;
};
}
}
@@ -85,10 +89,17 @@ export class CopilotCronJobs {
}
@OnJob('copilot.workspace.cleanupTrashedDocEmbeddings')
async cleanupTrashedDocEmbeddings() {
const workspaces = await this.models.workspace.list(undefined, {
id: true,
});
async cleanupTrashedDocEmbeddings(
params: Jobs['copilot.workspace.cleanupTrashedDocEmbeddings']
) {
const nextSid = params.nextSid ?? 0;
let workspaces = await this.models.workspace.listAfterSid(
nextSid,
CLEANUP_EMBEDDING_JOB_BATCH_SIZE
);
if (!workspaces.length) {
return JOB_SIGNAL.Done;
}
for (const { id: workspaceId } of workspaces) {
await this.jobs.add(
'copilot.embedding.cleanupTrashedDocEmbeddings',
@@ -96,5 +107,7 @@ export class CopilotCronJobs {
{ jobId: `cleanup-trashed-doc-embeddings-${workspaceId}` }
);
}
params.nextSid = workspaces[workspaces.length - 1].sid;
return JOB_SIGNAL.Repeat;
}
}