diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md index 3159900617..dacd531226 100644 --- a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md +++ b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md @@ -89,3 +89,19 @@ Generated by [AVA](https://avajs.dev). > should not find docs to embed 0 + +## should filter outdated doc id style in embedding status + +> should include modern doc format + + { + embedded: 0, + total: 1, + } + +> should count docs after filtering outdated + + { + embedded: 1, + total: 1, + } diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap index c1eb1e4f5a..5f5bdbc4ee 100644 Binary files a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap and b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap differ diff --git a/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts b/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts index 6adf124df1..5ddfc67e2d 100644 --- a/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts +++ b/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts @@ -306,3 +306,50 @@ test('should check embedding table', async t => { // t.false(ret, 'should return false when embedding table is not available'); // } }); + +test('should filter outdated doc id style in embedding status', async t => { + const docId = randomUUID(); + const outdatedDocId = `${workspace.id}:space:${docId}`; + + await t.context.doc.upsert({ + spaceId: workspace.id, + docId, + blob: Uint8Array.from([1, 2, 3]), + timestamp: Date.now(), + editorId: user.id, + }); + + await t.context.doc.upsert({ + spaceId: workspace.id, + docId: outdatedDocId, + blob: Uint8Array.from([1, 2, 3]), + timestamp: Date.now(), + editorId: user.id, + }); + + { + const status = await t.context.copilotWorkspace.getEmbeddingStatus( + workspace.id + ); + t.snapshot(status, 'should include modern doc format'); + } + + { + await t.context.copilotContext.insertWorkspaceEmbedding( + workspace.id, + docId, + [ + { + index: 0, + content: 'content', + embedding: Array.from({ length: 1024 }, () => 1), + }, + ] + ); + + const status = await t.context.copilotWorkspace.getEmbeddingStatus( + workspace.id + ); + t.snapshot(status, 'should count docs after filtering outdated'); + } +}); diff --git a/packages/backend/server/src/models/copilot-workspace.ts b/packages/backend/server/src/models/copilot-workspace.ts index 2a0fcbc4a4..651fa131e3 100644 --- a/packages/backend/server/src/models/copilot-workspace.ts +++ b/packages/backend/server/src/models/copilot-workspace.ts @@ -152,7 +152,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel { } @Transactional() - async getWorkspaceEmbeddingStatus(workspaceId: string) { + async getEmbeddingStatus(workspaceId: string) { const ignoredDocIds = (await this.listIgnoredDocIds(workspaceId)).map( d => d.docId ); @@ -168,9 +168,13 @@ export class CopilotWorkspaceConfigModel extends BaseModel { }; const [docTotal, docEmbedded, fileTotal, fileEmbedded] = await Promise.all([ - this.db.snapshot.count({ where: snapshotCondition }), - this.db.snapshot.count({ + this.db.snapshot.findMany({ + where: snapshotCondition, + select: { id: true }, + }), + this.db.snapshot.findMany({ where: { ...snapshotCondition, embedding: { some: {} } }, + select: { id: true }, }), this.db.aiWorkspaceFiles.count({ where: { workspaceId } }), this.db.aiWorkspaceFiles.count({ @@ -178,9 +182,23 @@ export class CopilotWorkspaceConfigModel extends BaseModel { }), ]); + const docTotalIds = docTotal.map(d => d.id); + const docTotalSet = new Set(docTotalIds); + const outdatedDocPrefix = `${workspaceId}:space:`; + const duplicateOutdatedDocSet = new Set( + docTotalIds + .filter(id => id.startsWith(outdatedDocPrefix)) + .filter(id => docTotalSet.has(id.slice(outdatedDocPrefix.length))) + ); + return { - total: docTotal + fileTotal, - embedded: docEmbedded + fileEmbedded, + total: + docTotalIds.filter(id => !duplicateOutdatedDocSet.has(id)).length + + fileTotal, + embedded: + docEmbedded + .map(d => d.id) + .filter(id => !duplicateOutdatedDocSet.has(id)).length + fileEmbedded, }; } diff --git a/packages/backend/server/src/plugins/copilot/context/resolver.ts b/packages/backend/server/src/plugins/copilot/context/resolver.ts index 484ecf8ba7..e235025177 100644 --- a/packages/backend/server/src/plugins/copilot/context/resolver.ts +++ b/packages/backend/server/src/plugins/copilot/context/resolver.ts @@ -356,6 +356,7 @@ export class CopilotContextRootResolver { return false; } + @Throttle('strict') @Query(() => ContextWorkspaceEmbeddingStatus, { description: 'query workspace embedding status', }) @@ -372,9 +373,7 @@ export class CopilotContextRootResolver { if (this.context.canEmbedding) { const { total, embedded } = - await this.models.copilotWorkspace.getWorkspaceEmbeddingStatus( - workspaceId - ); + await this.models.copilotWorkspace.getEmbeddingStatus(workspaceId); return { total, embedded }; } diff --git a/packages/backend/server/src/plugins/copilot/workspace/resolver.ts b/packages/backend/server/src/plugins/copilot/workspace/resolver.ts index a216ad947e..9d33c92ab9 100644 --- a/packages/backend/server/src/plugins/copilot/workspace/resolver.ts +++ b/packages/backend/server/src/plugins/copilot/workspace/resolver.ts @@ -103,6 +103,7 @@ export class CopilotWorkspaceEmbeddingConfigResolver { return ignoredDocs; } + @Mutation(() => Number, { name: 'updateWorkspaceEmbeddingIgnoredDocs', complexity: 2,