feat(server): rerank for matching (#12039)

fix AI-20
fix AI-77

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Enhanced relevance-based re-ranking for embedding results, improving the accuracy of content suggestions.
  - Added prioritization for workspace content that matches specific document IDs in search results.
  - Introduced a new scoped threshold parameter to refine workspace document matching.

- **Improvements**
  - Increased default similarity threshold for file chunk matching, resulting in more precise matches.
  - Doubled candidate retrieval for file and workspace chunk matching to improve result quality.
  - Updated sorting to prioritize context-relevant documents in workspace matches.
  - Explicitly included original input content in re-ranking calls for better relevance assessment.

- **Bug Fixes**
  - Adjusted re-ranking logic to return only highly relevant results based on confidence scores.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-05-09 03:59:03 +00:00
parent c24fde7168
commit cb49ab0f69
7 changed files with 176 additions and 18 deletions

View File

@@ -2,11 +2,14 @@ import {
createOpenAI,
type OpenAIProvider as VercelOpenAIProvider,
} from '@ai-sdk/openai';
import { embedMany } from 'ai';
import { embedMany, generateObject } from 'ai';
import { chunk } from 'lodash-es';
import { Embedding } from '../../../models';
import { ChunkSimilarity, Embedding } from '../../../models';
import { OpenAIConfig } from '../providers/openai';
import { EmbeddingClient } from './types';
import { EmbeddingClient, getReRankSchema, ReRankResult } from './types';
const RERANK_MODEL = 'gpt-4.1-mini';
export class OpenAIEmbeddingClient extends EmbeddingClient {
readonly #instance: VercelOpenAIProvider;
@@ -35,6 +38,85 @@ export class OpenAIEmbeddingClient extends EmbeddingClient {
content: input[index],
}));
}
private getRelevancePrompt<Chunk extends ChunkSimilarity = ChunkSimilarity>(
query: string,
embeddings: Chunk[]
) {
const results = embeddings
.map(e => {
const targetId = 'docId' in e ? e.docId : 'fileId' in e ? e.fileId : '';
// NOTE: not xml, just for the sake of the prompt format
return [
'<result>',
`<targetId>${targetId}</targetId>`,
`<chunk>${e.chunk}</chunk>`,
`<content>${e.content}</content>`,
'</result>',
];
})
.flat()
.join('\n');
return `Generate a score array based on the search results list to measure the likelihood that the information contained in the search results is useful for the report on the following topic: ${query}\n\nHere are the search results:\n<results>\n${results}\n</results>`;
}
private async getEmbeddingRelevance<
Chunk extends ChunkSimilarity = ChunkSimilarity,
>(
query: string,
embeddings: Chunk[],
signal?: AbortSignal
): Promise<ReRankResult> {
const prompt = this.getRelevancePrompt(query, embeddings);
const modelInstance = this.#instance(RERANK_MODEL);
const {
object: { ranks },
} = await generateObject({
model: modelInstance,
prompt,
schema: getReRankSchema(embeddings.length),
maxRetries: 3,
abortSignal: signal,
});
return ranks;
}
override async reRank<Chunk extends ChunkSimilarity = ChunkSimilarity>(
query: string,
embeddings: Chunk[],
topK: number,
signal?: AbortSignal
): Promise<Chunk[]> {
const sortedEmbeddings = embeddings.toSorted(
(a, b) => (a.distance ?? Infinity) - (b.distance ?? Infinity)
);
const chunks = sortedEmbeddings.reduce(
(acc, e) => {
const targetId = 'docId' in e ? e.docId : 'fileId' in e ? e.fileId : '';
const key = `${targetId}:${e.chunk}`;
acc[key] = e;
return acc;
},
{} as Record<string, Chunk>
);
const ranks = [];
for (const c of chunk(sortedEmbeddings, Math.min(topK, 10))) {
const rank = await this.getEmbeddingRelevance(query, c, signal);
ranks.push(rank);
}
const highConfidenceChunks = ranks
.flat()
.toSorted((a, b) => b.scores.score - a.scores.score)
.filter(r => r.scores.score > 5)
.map(r => chunks[`${r.scores.targetId}:${r.scores.chunk}`])
.filter(Boolean);
return highConfidenceChunks.slice(0, topK);
}
}
export class MockEmbeddingClient extends EmbeddingClient {