mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 21:27:20 +00:00
fix(server): improve outdated embedding cleanup (#13476)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Prevents accidental deletion of placeholder documents during embedding cleanup. * Improves accuracy when identifying documents to remove, using multiple data sources. * Skips unnecessary cleanup when no embeddings or snapshots exist, reducing noise and overhead. * **Chores** * Streamlined and centralized document filtering logic to ensure consistent cleanup behavior. * Parallelized data checks to make cleanup more efficient without changing user workflows. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -532,11 +532,14 @@ export class CopilotEmbeddingJob {
|
||||
return;
|
||||
}
|
||||
|
||||
const docIdsInEmbedding =
|
||||
await this.models.copilotContext.listWorkspaceDocEmbedding(workspaceId);
|
||||
if (!docIdsInEmbedding.length) {
|
||||
const [docIdsInEmbedding, docIdsInSnapshots] = await Promise.all([
|
||||
this.models.copilotContext.listWorkspaceDocEmbedding(workspaceId),
|
||||
this.models.copilotWorkspace.listEmbeddableDocIds(workspaceId),
|
||||
]);
|
||||
|
||||
if (!docIdsInEmbedding.length && !docIdsInSnapshots.length) {
|
||||
this.logger.verbose(
|
||||
`No doc embeddings found in workspace ${workspaceId}, skipping cleanup`
|
||||
`No doc embeddings and snapshots found in workspace ${workspaceId}, skipping cleanup`
|
||||
);
|
||||
await this.models.workspace.update(
|
||||
workspaceId,
|
||||
@@ -549,10 +552,17 @@ export class CopilotEmbeddingJob {
|
||||
const docIdsInWorkspace = readAllDocIdsFromWorkspaceSnapshot(snapshot.blob);
|
||||
const docIdsInWorkspaceSet = new Set(docIdsInWorkspace);
|
||||
|
||||
const deletedDocIds = docIdsInEmbedding.filter(
|
||||
docId => !docIdsInWorkspaceSet.has(docId)
|
||||
const deletedDocIds = new Set(
|
||||
[...docIdsInEmbedding, ...docIdsInSnapshots].filter(
|
||||
docId => !docIdsInWorkspaceSet.has(docId)
|
||||
)
|
||||
);
|
||||
for (const docId of deletedDocIds) {
|
||||
const isPlaceholder = await this.models.copilotWorkspace.hasPlaceholder(
|
||||
workspaceId,
|
||||
docId
|
||||
);
|
||||
if (isPlaceholder) continue;
|
||||
await this.models.copilotContext.deleteWorkspaceEmbedding(
|
||||
workspaceId,
|
||||
docId
|
||||
|
||||
Reference in New Issue
Block a user