fix(server): query workspace embed files (#11982)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
	- Expanded file chunk matching to include both context and workspace file embeddings, providing broader and more relevant search results.
- **Improvements**
	- Enhanced result ranking by introducing a re-ranking step for combined embedding matches, improving the relevance of returned file chunks.
	- Adjusted file count reporting to reflect the total number of workspace files instead of ignored documents for more accurate workspace file statistics.
	- Renamed and streamlined workspace file management methods for clearer and more consistent API usage.
- **Bug Fixes**
	- Prevented embedding similarity queries when embedding is disabled for a workspace, improving system behavior consistency.
- **Tests**
	- Added comprehensive tests to verify workspace embedding management, including enabling, matching, and disabling embedding functionality.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-04-25 08:32:32 +00:00
parent 0abe65653b
commit 49c57ca649
13 changed files with 220 additions and 112 deletions

View File

@@ -153,7 +153,7 @@ export class CopilotContextModel extends BaseModel {
return Prisma.join(groups.map(row => Prisma.sql`(${Prisma.join(row)})`));
}
async insertContentEmbedding(
async insertFileEmbedding(
contextId: string,
fileId: string,
embeddings: Embedding[]
@@ -168,7 +168,7 @@ export class CopilotContextModel extends BaseModel {
`;
}
async matchContentEmbedding(
async matchFileEmbedding(
embedding: number[],
contextId: string,
topK: number,

View File

@@ -138,7 +138,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
}
@Transactional()
async addFileEmbeddings(
async insertFileEmbeddings(
workspaceId: string,
fileId: string,
embeddings: Embedding[]
@@ -151,7 +151,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
`;
}
async listWorkspaceFiles(
async listFiles(
workspaceId: string,
options?: {
includeRead?: boolean;
@@ -168,7 +168,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
return files;
}
async countWorkspaceFiles(workspaceId: string): Promise<number> {
async countFiles(workspaceId: string): Promise<number> {
const count = await this.db.aiWorkspaceFiles.count({
where: {
workspaceId,
@@ -177,12 +177,16 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
return count;
}
async matchWorkspaceFileEmbedding(
async matchFileEmbedding(
workspaceId: string,
embedding: number[],
topK: number,
threshold: number
): Promise<FileChunkSimilarity[]> {
if (!(await this.allowEmbedding(workspaceId))) {
return [];
}
const similarityChunks = await this.db.$queryRaw<
Array<FileChunkSimilarity>
>`
@@ -195,7 +199,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
return similarityChunks.filter(c => Number(c.distance) <= threshold);
}
async removeWorkspaceFile(workspaceId: string, fileId: string) {
async removeFile(workspaceId: string, fileId: string) {
// embeddings will be removed by foreign key constraint
await this.db.aiWorkspaceFiles.deleteMany({
where: {
@@ -205,4 +209,8 @@ export class CopilotWorkspaceConfigModel extends BaseModel {
});
return true;
}
private allowEmbedding(workspaceId: string) {
return this.models.workspace.allowEmbedding(workspaceId);
}
}