fix(server): exclude outdated doc id style in embedding count (#13269)

fix AI-392
fix AI-393

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Improved filtering of outdated document ID styles in embedding status
reporting, ensuring more accurate counts of embedded documents.
* Stricter rate limiting applied to workspace embedding status queries
for enhanced system reliability.

* **Bug Fixes**
* Resolved issues with duplicate or outdated document IDs affecting
embedding status totals.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-07-21 18:58:29 +08:00
committed by GitHub
parent b53b4884cf
commit c4cf5799d4
6 changed files with 89 additions and 8 deletions

View File

@@ -89,3 +89,19 @@ Generated by [AVA](https://avajs.dev).
> should not find docs to embed
0
## should filter outdated doc id style in embedding status
> should include modern doc format
{
embedded: 0,
total: 1,
}
> should count docs after filtering outdated
{
embedded: 1,
total: 1,
}

View File

@@ -306,3 +306,50 @@ test('should check embedding table', async t => {
// t.false(ret, 'should return false when embedding table is not available');
// }
});
test('should filter outdated doc id style in embedding status', async t => {
const docId = randomUUID();
const outdatedDocId = `${workspace.id}:space:${docId}`;
await t.context.doc.upsert({
spaceId: workspace.id,
docId,
blob: Uint8Array.from([1, 2, 3]),
timestamp: Date.now(),
editorId: user.id,
});
await t.context.doc.upsert({
spaceId: workspace.id,
docId: outdatedDocId,
blob: Uint8Array.from([1, 2, 3]),
timestamp: Date.now(),
editorId: user.id,
});
{
const status = await t.context.copilotWorkspace.getEmbeddingStatus(
workspace.id
);
t.snapshot(status, 'should include modern doc format');
}
{
await t.context.copilotContext.insertWorkspaceEmbedding(
workspace.id,
docId,
[
{
index: 0,
content: 'content',
embedding: Array.from({ length: 1024 }, () => 1),
},
]
);
const status = await t.context.copilotWorkspace.getEmbeddingStatus(
workspace.id
);
t.snapshot(status, 'should count docs after filtering outdated');
}
});

View File

@@ -152,7 +152,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
}
@Transactional()
async getWorkspaceEmbeddingStatus(workspaceId: string) {
async getEmbeddingStatus(workspaceId: string) {
const ignoredDocIds = (await this.listIgnoredDocIds(workspaceId)).map(
d => d.docId
);
@@ -168,9 +168,13 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
};
const [docTotal, docEmbedded, fileTotal, fileEmbedded] = await Promise.all([
this.db.snapshot.count({ where: snapshotCondition }),
this.db.snapshot.count({
this.db.snapshot.findMany({
where: snapshotCondition,
select: { id: true },
}),
this.db.snapshot.findMany({
where: { ...snapshotCondition, embedding: { some: {} } },
select: { id: true },
}),
this.db.aiWorkspaceFiles.count({ where: { workspaceId } }),
this.db.aiWorkspaceFiles.count({
@@ -178,9 +182,23 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
}),
]);
const docTotalIds = docTotal.map(d => d.id);
const docTotalSet = new Set(docTotalIds);
const outdatedDocPrefix = `${workspaceId}:space:`;
const duplicateOutdatedDocSet = new Set(
docTotalIds
.filter(id => id.startsWith(outdatedDocPrefix))
.filter(id => docTotalSet.has(id.slice(outdatedDocPrefix.length)))
);
return {
total: docTotal + fileTotal,
embedded: docEmbedded + fileEmbedded,
total:
docTotalIds.filter(id => !duplicateOutdatedDocSet.has(id)).length +
fileTotal,
embedded:
docEmbedded
.map(d => d.id)
.filter(id => !duplicateOutdatedDocSet.has(id)).length + fileEmbedded,
};
}

View File

@@ -356,6 +356,7 @@ export class CopilotContextRootResolver {
return false;
}
@Throttle('strict')
@Query(() => ContextWorkspaceEmbeddingStatus, {
description: 'query workspace embedding status',
})
@@ -372,9 +373,7 @@ export class CopilotContextRootResolver {
if (this.context.canEmbedding) {
const { total, embedded } =
await this.models.copilotWorkspace.getWorkspaceEmbeddingStatus(
workspaceId
);
await this.models.copilotWorkspace.getEmbeddingStatus(workspaceId);
return { total, embedded };
}

View File

@@ -103,6 +103,7 @@ export class CopilotWorkspaceEmbeddingConfigResolver {
return ignoredDocs;
}
@Mutation(() => Number, {
name: 'updateWorkspaceEmbeddingIgnoredDocs',
complexity: 2,