mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-24 01:42:55 +08:00
feat(server): rerank for matching (#12039)
fix AI-20 fix AI-77 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced relevance-based re-ranking for embedding results, improving the accuracy of content suggestions. - Added prioritization for workspace content that matches specific document IDs in search results. - Introduced a new scoped threshold parameter to refine workspace document matching. - **Improvements** - Increased default similarity threshold for file chunk matching, resulting in more precise matches. - Doubled candidate retrieval for file and workspace chunk matching to improve result quality. - Updated sorting to prioritize context-relevant documents in workspace matches. - Explicitly included original input content in re-ranking calls for better relevance assessment. - **Bug Fixes** - Adjusted re-ranking logic to return only highly relevant results based on confidence scores. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -55,6 +55,12 @@ export class ContextSession implements AsyncDisposable {
|
||||
return this.config.files.map(f => ({ ...f }));
|
||||
}
|
||||
|
||||
get docIds() {
|
||||
return Array.from(
|
||||
new Set([this.config.docs, this.config.categories].flat().map(d => d.id))
|
||||
);
|
||||
}
|
||||
|
||||
get sortedList(): ContextList {
|
||||
const { docs, files } = this.config;
|
||||
return [...docs, ...files].toSorted(
|
||||
@@ -176,7 +182,7 @@ export class ContextSession implements AsyncDisposable {
|
||||
content: string,
|
||||
topK: number = 5,
|
||||
signal?: AbortSignal,
|
||||
threshold: number = 0.7
|
||||
threshold: number = 0.85
|
||||
): Promise<FileChunkSimilarity[]> {
|
||||
const embedding = await this.client
|
||||
.getEmbeddings([content], signal)
|
||||
@@ -187,18 +193,23 @@ export class ContextSession implements AsyncDisposable {
|
||||
this.models.copilotContext.matchFileEmbedding(
|
||||
embedding,
|
||||
this.id,
|
||||
topK,
|
||||
topK * 2,
|
||||
threshold
|
||||
),
|
||||
this.models.copilotWorkspace.matchFileEmbedding(
|
||||
this.workspaceId,
|
||||
embedding,
|
||||
topK,
|
||||
topK * 2,
|
||||
threshold
|
||||
),
|
||||
]);
|
||||
|
||||
return this.client.reRank([...context, ...workspace]);
|
||||
return this.client.reRank(
|
||||
content,
|
||||
[...context, ...workspace],
|
||||
topK,
|
||||
signal
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -213,18 +224,44 @@ export class ContextSession implements AsyncDisposable {
|
||||
content: string,
|
||||
topK: number = 5,
|
||||
signal?: AbortSignal,
|
||||
threshold: number = 0.7
|
||||
scopedThreshold: number = 0.5,
|
||||
threshold: number = 0.85
|
||||
) {
|
||||
const embedding = await this.client
|
||||
.getEmbeddings([content], signal)
|
||||
.then(r => r?.[0]?.embedding);
|
||||
if (!embedding) return [];
|
||||
|
||||
return this.models.copilotContext.matchWorkspaceEmbedding(
|
||||
embedding,
|
||||
this.workspaceId,
|
||||
const docIds = this.docIds;
|
||||
const [inContext, workspace] = await Promise.all([
|
||||
this.models.copilotContext.matchWorkspaceEmbedding(
|
||||
embedding,
|
||||
this.workspaceId,
|
||||
topK * 2,
|
||||
scopedThreshold,
|
||||
docIds
|
||||
),
|
||||
this.models.copilotContext.matchWorkspaceEmbedding(
|
||||
embedding,
|
||||
this.workspaceId,
|
||||
topK * 2,
|
||||
threshold
|
||||
),
|
||||
]);
|
||||
|
||||
const result = await this.client.reRank(
|
||||
content,
|
||||
[...inContext, ...workspace],
|
||||
topK,
|
||||
threshold
|
||||
signal
|
||||
);
|
||||
|
||||
// sort result, doc recorded in context first
|
||||
const docIdSet = new Set(docIds);
|
||||
return result.toSorted(
|
||||
(a, b) =>
|
||||
(docIdSet.has(a.docId) ? -1 : 1) - (docIdSet.has(b.docId) ? -1 : 1) ||
|
||||
(a.distance || Infinity) - (b.distance || Infinity)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user