fix(server): missing embedding search (#13401)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Enhanced search functionality to include results from additional
"blob" data sources, providing more comprehensive search results.

* **Bug Fixes**
* Improved messaging to ensure "No results found" is only shown when no
relevant results exist across all data sources.

* **Tests**
* Updated test cases to reflect new keyword contexts, improving
validation accuracy for search-related features.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2025-08-04 16:19:59 +08:00
committed by GitHub
parent 76eedf3b76
commit c31504baaf
3 changed files with 33 additions and 8 deletions

View File

@@ -534,10 +534,16 @@ The term **“CRDT”** was first introduced by Marc Shapiro, Nuno Preguiça, Ca
'Chat With AFFiNE AI',
'Search With AFFiNE AI',
],
messages: [{ role: 'user' as const, content: TestAssets.AFFiNE }],
messages: [{ role: 'user' as const, content: TestAssets.SSOT }],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(result.includes('AFFiNE'), 'should include original keyword');
const cleared = result.toLowerCase();
t.assert(
cleared.includes('single source of truth') ||
/single.*source/.test(cleared) ||
cleared.includes('ssot'),
'should include original keyword'
);
},
type: 'text' as const,
},
@@ -595,13 +601,17 @@ The term **“CRDT”** was first introduced by Marc Shapiro, Nuno Preguiça, Ca
messages: [
{
role: 'user' as const,
content: TestAssets.AFFiNE,
content: TestAssets.SSOT,
params: { language: 'Simplified Chinese' },
},
],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(result.includes('AFFiNE'), 'should include keyword');
const cleared = result.toLowerCase();
t.assert(
cleared.includes('单一') || cleared.includes('SSOT'),
'explain code result should include keyword'
);
},
type: 'text' as const,
},
@@ -623,7 +633,7 @@ The term **“CRDT”** was first introduced by Marc Shapiro, Nuno Preguiça, Ca
content.includes('classroom') ||
content.includes('school') ||
content.includes('sky'),
'should include keyword'
'explain code result should include keyword'
);
},
type: 'text' as const,

View File

@@ -232,7 +232,7 @@ export class CopilotContextService implements OnApplicationBootstrap {
const embedding = await this.embeddingClient.getEmbedding(content, signal);
if (!embedding) return [];
const [fileChunks, workspaceChunks, scopedWorkspaceChunks] =
const [fileChunks, blobChunks, workspaceChunks, scopedWorkspaceChunks] =
await Promise.all([
this.models.copilotWorkspace.matchFileEmbedding(
workspaceId,
@@ -240,6 +240,12 @@ export class CopilotContextService implements OnApplicationBootstrap {
topK * 2,
threshold
),
this.models.copilotWorkspace.matchBlobEmbedding(
workspaceId,
embedding,
topK * 2,
threshold
),
this.models.copilotContext.matchWorkspaceEmbedding(
embedding,
workspaceId,
@@ -259,6 +265,7 @@ export class CopilotContextService implements OnApplicationBootstrap {
if (
!fileChunks.length &&
!blobChunks.length &&
!workspaceChunks.length &&
!scopedWorkspaceChunks?.length
) {
@@ -267,7 +274,12 @@ export class CopilotContextService implements OnApplicationBootstrap {
return await this.embeddingClient.reRank(
content,
[...fileChunks, ...workspaceChunks, ...(scopedWorkspaceChunks || [])],
[
...fileChunks,
...blobChunks,
...workspaceChunks,
...(scopedWorkspaceChunks || []),
],
topK,
signal
);

View File

@@ -42,12 +42,14 @@ export const buildDocSearchGetter = (
chunks.filter(c => 'docId' in c),
'Doc.Read'
);
const blobChunks = chunks.filter(c => 'blobId' in c);
const fileChunks = chunks.filter(c => 'fileId' in c);
if (contextChunks.length) {
fileChunks.push(...contextChunks);
}
if (!docChunks.length && !fileChunks.length)
if (!blobChunks.length && !docChunks.length && !fileChunks.length) {
return `No results found for "${query}".`;
}
const docIds = docChunks.map(c => ({
// oxlint-disable-next-line no-non-null-assertion
@@ -80,6 +82,7 @@ export const buildDocSearchGetter = (
return [
...fileChunks.map(clearEmbeddingChunk),
...blobChunks.map(clearEmbeddingChunk),
...docChunks.map(c => ({
...c,
...docMetas.get(c.docId),