fix(server): fulfill empty embedding for trashed docs (#13461)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- New Features
  - None
- Bug Fixes
- Ensures a placeholder embedding is always created when content is
empty or after deletion, reducing errors and improving Copilot
stability.
- Refactor
- Centralized empty-embedding handling for consistent behavior across
workflows.
- Standardized embedding dimension configuration to a single source for
reliability.
- Chores
- Simplified internal embedding module surface and imports for
maintainability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-08-11 11:23:45 +08:00
committed by GitHub
parent 07b9b4fb8d
commit 4ffa3b5ccc
5 changed files with 36 additions and 26 deletions

View File

@@ -19,6 +19,8 @@ import {
type UpdateCopilotContextInput = Pick<CopilotContext, 'config'>;
export const EMBEDDING_DIMENSIONS = 1024;
/**
* Copilot Job Model
*/
@@ -290,10 +292,24 @@ export class CopilotContextModel extends BaseModel {
`;
}
async fulfillEmptyEmbedding(workspaceId: string, docId: string) {
const emptyEmbedding = {
index: 0,
content: '',
embedding: Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0),
};
await this.models.copilotContext.insertWorkspaceEmbedding(
workspaceId,
docId,
[emptyEmbedding]
);
}
async deleteWorkspaceEmbedding(workspaceId: string, docId: string) {
await this.db.aiWorkspaceEmbedding.deleteMany({
where: { workspaceId, docId },
});
await this.fulfillEmptyEmbedding(workspaceId, docId);
}
async matchWorkspaceEmbedding(

View File

@@ -7,7 +7,11 @@ import {
CopilotProviderNotSupported,
} from '../../../base';
import { CopilotFailedToGenerateEmbedding } from '../../../base/error/errors.gen';
import { ChunkSimilarity, Embedding } from '../../../models';
import {
ChunkSimilarity,
Embedding,
EMBEDDING_DIMENSIONS,
} from '../../../models';
import { PromptService } from '../prompt';
import {
type CopilotProvider,
@@ -16,11 +20,7 @@ import {
ModelInputType,
ModelOutputType,
} from '../providers';
import {
EMBEDDING_DIMENSIONS,
EmbeddingClient,
type ReRankResult,
} from './types';
import { EmbeddingClient, type ReRankResult } from './types';
const EMBEDDING_MODEL = 'gemini-embedding-001';
const RERANK_PROMPT = 'Rerank results';

View File

@@ -1,4 +1,4 @@
export { getEmbeddingClient, MockEmbeddingClient } from './client';
export { CopilotEmbeddingJob } from './job';
export type { Chunk, DocFragment } from './types';
export { EMBEDDING_DIMENSIONS, EmbeddingClient } from './types';
export { EmbeddingClient } from './types';

View File

@@ -20,7 +20,7 @@ import { CopilotStorage } from '../storage';
import { readStream } from '../utils';
import { getEmbeddingClient } from './client';
import type { Chunk, DocFragment } from './types';
import { EMBEDDING_DIMENSIONS, EmbeddingClient } from './types';
import { EmbeddingClient } from './types';
@Injectable()
export class CopilotEmbeddingJob {
@@ -392,19 +392,6 @@ export class CopilotEmbeddingJob {
return controller.signal;
}
private async fulfillEmptyEmbedding(workspaceId: string, docId: string) {
const emptyEmbedding = {
index: 0,
content: '',
embedding: Array.from({ length: EMBEDDING_DIMENSIONS }, () => 0),
};
await this.models.copilotContext.insertWorkspaceEmbedding(
workspaceId,
docId,
[emptyEmbedding]
);
}
@OnJob('copilot.embedding.docs')
async embedPendingDocs({
contextId,
@@ -466,13 +453,19 @@ export class CopilotEmbeddingJob {
this.logger.warn(
`Doc ${docId} in workspace ${workspaceId} has no summary, fulfilling empty embedding.`
);
await this.fulfillEmptyEmbedding(workspaceId, docId);
await this.models.copilotContext.fulfillEmptyEmbedding(
workspaceId,
docId
);
}
} else {
this.logger.warn(
`Doc ${docId} in workspace ${workspaceId} has no fragment, fulfilling empty embedding.`
);
await this.fulfillEmptyEmbedding(workspaceId, docId);
await this.models.copilotContext.fulfillEmptyEmbedding(
workspaceId,
docId
);
}
}
} catch (error: any) {
@@ -490,7 +483,10 @@ export class CopilotEmbeddingJob {
`Doc ${docId} in workspace ${workspaceId} has no content, fulfilling empty embedding.`
);
// if the doc is empty, we still need to fulfill the embedding
await this.fulfillEmptyEmbedding(workspaceId, docId);
await this.models.copilotContext.fulfillEmptyEmbedding(
workspaceId,
docId
);
return;
}

View File

@@ -98,8 +98,6 @@ export type Chunk = {
content: string;
};
export const EMBEDDING_DIMENSIONS = 1024;
export abstract class EmbeddingClient {
async configured() {
return true;