feat(server): improve context metadata & matching (#12064)

fix AI-20

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Enhanced file metadata with MIME type, blob ID, and file name across context and workspace, now visible in UI and API.
  - Added workspace-level matching for files and documents with configurable thresholds and workspace scoping in search queries.
  - Introduced a new error type and user-friendly messaging for global workspace context matching failures.

- **Bug Fixes**
  - Improved consistent handling of file MIME types and nullable context IDs for accurate metadata.

- **Documentation**
  - Updated GraphQL schema, queries, and mutations to include new metadata fields, optional parameters, and error types.

- **Style**
  - Added new localization strings for global context matching error messages.

- **Tests**
  - Extended test coverage with new and updated snapshot tests for metadata and matching logic.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
darkskygit
2025-05-14 06:32:29 +00:00
parent 04c5fd6dfc
commit cecf545590
36 changed files with 465 additions and 108 deletions

View File

@@ -23,6 +23,7 @@ import {
CallMetric,
CopilotEmbeddingUnavailable,
CopilotFailedToMatchContext,
CopilotFailedToMatchGlobalContext,
CopilotFailedToModifyContext,
CopilotSessionNotFound,
EventBus,
@@ -117,8 +118,8 @@ class RemoveContextFileInput {
@ObjectType('CopilotContext')
export class CopilotContextType {
@Field(() => ID)
id!: string;
@Field(() => ID, { nullable: true })
id!: string | undefined;
@Field(() => String)
workspaceId!: string;
@@ -169,6 +170,9 @@ class CopilotContextFile implements ContextFile {
@Field(() => String)
name!: string;
@Field(() => String)
mimeType!: string;
@Field(() => SafeIntResolver)
chunkSize!: number;
@@ -190,6 +194,15 @@ class ContextMatchedFileChunk implements FileChunkSimilarity {
@Field(() => String)
fileId!: string;
@Field(() => String)
blobId!: string;
@Field(() => String)
name!: string;
@Field(() => String)
mimeType!: string;
@Field(() => SafeIntResolver)
chunk!: number;
@@ -283,6 +296,15 @@ export class CopilotContextRootResolver {
}
}
if (copilot.workspaceId) {
return [
{
id: undefined,
workspaceId: copilot.workspaceId,
},
];
}
return [];
}
@@ -387,6 +409,9 @@ export class CopilotContextResolver {
async collections(
@Parent() context: CopilotContextType
): Promise<CopilotContextCategory[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
const collections = session.collections;
await this.models.copilotContext.mergeDocStatus(
@@ -404,6 +429,9 @@ export class CopilotContextResolver {
async tags(
@Parent() context: CopilotContextType
): Promise<CopilotContextCategory[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
const tags = session.tags;
await this.models.copilotContext.mergeDocStatus(
@@ -419,6 +447,9 @@ export class CopilotContextResolver {
})
@CallMetric('ai', 'context_file_list')
async docs(@Parent() context: CopilotContextType): Promise<CopilotDocType[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
const docs = session.docs;
await this.models.copilotContext.mergeDocStatus(session.workspaceId, docs);
@@ -433,6 +464,9 @@ export class CopilotContextResolver {
async files(
@Parent() context: CopilotContextType
): Promise<CopilotContextFile[]> {
if (!context.id) {
return [];
}
const session = await this.context.get(context.id);
return session.files;
}
@@ -593,7 +627,11 @@ export class CopilotContextResolver {
const session = await this.context.get(options.contextId);
try {
const file = await session.addFile(options.blobId, content.filename);
const file = await session.addFile(
options.blobId,
content.filename,
content.mimetype
);
const buffer = await readStream(content.createReadStream());
await this.storage.put(
@@ -664,6 +702,8 @@ export class CopilotContextResolver {
@Args('content') content: string,
@Args('limit', { type: () => SafeIntResolver, nullable: true })
limit?: number,
@Args('scopedThreshold', { type: () => Float, nullable: true })
scopedThreshold?: number,
@Args('threshold', { type: () => Float, nullable: true })
threshold?: number
): Promise<ContextMatchedFileChunk[]> {
@@ -671,22 +711,46 @@ export class CopilotContextResolver {
return [];
}
const session = await this.context.get(context.id);
try {
return await session.matchFileChunks(
if (!context.id) {
return await this.context.matchWorkspaceFiles(
context.workspaceId,
content,
limit,
this.getSignal(ctx.req),
threshold
);
}
const session = await this.context.get(context.id);
return await session.matchFiles(
content,
limit,
this.getSignal(ctx.req),
scopedThreshold,
threshold
);
} catch (e: any) {
throw new CopilotFailedToMatchContext({
contextId: context.id,
// don't record the large content
content: content.slice(0, 512),
message: e.message,
});
// passthrough user friendly error
if (e instanceof UserFriendlyError) {
throw e;
}
if (context.id) {
throw new CopilotFailedToMatchContext({
contextId: context.id,
// don't record the large content
content: content.slice(0, 512),
message: e.message,
});
} else {
throw new CopilotFailedToMatchGlobalContext({
workspaceId: context.workspaceId,
// don't record the large content
content: content.slice(0, 512),
message: e.message,
});
}
}
}
@@ -711,20 +775,38 @@ export class CopilotContextResolver {
}
try {
const session = await this.context.get(context.id);
await this.ac
.user(user.id)
.workspace(session.workspaceId)
.workspace(context.workspaceId)
.allowLocal()
.assert('Workspace.Copilot');
const allowEmbedding = await this.models.workspace.allowEmbedding(
session.workspaceId
context.workspaceId
);
if (!allowEmbedding) {
return [];
}
const chunks = await session.matchWorkspaceChunks(
if (!context.id) {
return await this.context.matchWorkspaceDocs(
context.workspaceId,
content,
limit,
this.getSignal(ctx.req),
threshold
);
}
const session = await this.context.get(context.id);
if (session.workspaceId !== context.workspaceId) {
throw new CopilotFailedToMatchContext({
contextId: context.id,
// don't record the large content
content: content.slice(0, 512),
message: 'context not in the same workspace',
});
}
const chunks = await session.matchWorkspaceDocs(
content,
limit,
this.getSignal(ctx.req),
@@ -748,12 +830,22 @@ export class CopilotContextResolver {
if (e instanceof UserFriendlyError) {
throw e;
}
throw new CopilotFailedToMatchContext({
contextId: context.id,
// don't record the large content
content: content.slice(0, 512),
message: e.message,
});
if (context.id) {
throw new CopilotFailedToMatchContext({
contextId: context.id,
// don't record the large content
content: content.slice(0, 512),
message: e.message,
});
} else {
throw new CopilotFailedToMatchGlobalContext({
workspaceId: context.workspaceId,
// don't record the large content
content: content.slice(0, 512),
message: e.message,
});
}
}
}
}

View File

@@ -148,6 +148,48 @@ export class CopilotContextService implements OnApplicationBootstrap {
return null;
}
async matchWorkspaceFiles(
workspaceId: string,
content: string,
topK: number = 5,
signal?: AbortSignal,
threshold: number = 0.5
) {
if (!this.embeddingClient) return [];
const embedding = await this.embeddingClient.getEmbedding(content, signal);
if (!embedding) return [];
const chunks = await this.models.copilotWorkspace.matchFileEmbedding(
workspaceId,
embedding,
topK * 2,
threshold
);
return this.embeddingClient.reRank(content, chunks, topK, signal);
}
async matchWorkspaceDocs(
workspaceId: string,
content: string,
topK: number = 5,
signal?: AbortSignal,
threshold: number = 0.5
) {
if (!this.embeddingClient) return [];
const embedding = await this.embeddingClient.getEmbedding(content, signal);
if (!embedding) return [];
const workspace = await this.models.copilotContext.matchWorkspaceEmbedding(
embedding,
workspaceId,
topK * 2,
threshold
);
return this.embeddingClient.reRank(content, workspace, topK);
}
@OnEvent('workspace.doc.embed.failed')
async onDocEmbedFailed({
contextId,

View File

@@ -52,12 +52,16 @@ export class ContextSession implements AsyncDisposable {
}
get files() {
return this.config.files.map(f => ({ ...f }));
return this.config.files.map(f => this.fulfillFile(f));
}
get docIds() {
return Array.from(
new Set([this.config.docs, this.config.categories].flat().map(d => d.id))
new Set(
[this.config.docs, this.config.categories.flatMap(c => c.docs)]
.flat()
.map(d => d.id)
)
);
}
@@ -136,14 +140,25 @@ export class ContextSession implements AsyncDisposable {
return true;
}
async addFile(blobId: string, name: string): Promise<ContextFile> {
private fulfillFile(file: ContextFile): Required<ContextFile> {
return {
...file,
mimeType: file.mimeType || 'application/octet-stream',
};
}
async addFile(
blobId: string,
name: string,
mimeType: string
): Promise<Required<ContextFile>> {
let fileId = nanoid();
const existsBlob = this.config.files.find(f => f.blobId === blobId);
if (existsBlob) {
// use exists file id if the blob exists
// we assume that the file content pointed to by the same blobId is consistent.
if (existsBlob.status === ContextEmbedStatus.finished) {
return existsBlob;
return this.fulfillFile(existsBlob);
}
fileId = existsBlob.id;
} else {
@@ -152,11 +167,12 @@ export class ContextSession implements AsyncDisposable {
blobId,
chunkSize: 0,
name,
mimeType,
error: null,
createdAt: Date.now(),
}));
}
return this.getFile(fileId) as ContextFile;
return this.fulfillFile(this.getFile(fileId) as ContextFile);
}
getFile(fileId: string): ContextFile | undefined {
@@ -181,15 +197,14 @@ export class ContextSession implements AsyncDisposable {
* @param threshold relevance threshold for the similarity score, higher threshold means more similar chunks, default 0.7, good enough based on prior experiments
* @returns list of similar chunks
*/
async matchFileChunks(
async matchFiles(
content: string,
topK: number = 5,
signal?: AbortSignal,
threshold: number = 0.85
scopedThreshold: number = 0.85,
threshold: number = 0.5
): Promise<FileChunkSimilarity[]> {
const embedding = await this.client
.getEmbeddings([content], signal)
.then(r => r?.[0]?.embedding);
const embedding = await this.client.getEmbedding(content, signal);
if (!embedding) return [];
const [context, workspace] = await Promise.all([
@@ -197,7 +212,7 @@ export class ContextSession implements AsyncDisposable {
embedding,
this.id,
topK * 2,
threshold
scopedThreshold
),
this.models.copilotWorkspace.matchFileEmbedding(
this.workspaceId,
@@ -206,10 +221,21 @@ export class ContextSession implements AsyncDisposable {
threshold
),
]);
const files = new Map(this.files.map(f => [f.id, f]));
return this.client.reRank(
content,
[...context, ...workspace],
[
...context
.filter(f => files.has(f.fileId))
.map(c => {
const { blobId, name, mimeType } = files.get(
c.fileId
) as Required<ContextFile>;
return { ...c, blobId, name, mimeType };
}),
...workspace,
],
topK,
signal
);
@@ -223,16 +249,14 @@ export class ContextSession implements AsyncDisposable {
* @param threshold relevance threshold for the similarity score, higher threshold means more similar chunks, default 0.7, good enough based on prior experiments
* @returns list of similar chunks
*/
async matchWorkspaceChunks(
async matchWorkspaceDocs(
content: string,
topK: number = 5,
signal?: AbortSignal,
scopedThreshold: number = 0.5,
threshold: number = 0.85
scopedThreshold: number = 0.85,
threshold: number = 0.5
) {
const embedding = await this.client
.getEmbeddings([content], signal)
.then(r => r?.[0]?.embedding);
const embedding = await this.client.getEmbedding(content, signal);
if (!embedding) return [];
const docIds = this.docIds;

View File

@@ -128,6 +128,11 @@ export abstract class EmbeddingClient {
.slice(0, topK);
}
async getEmbedding(query: string, signal?: AbortSignal) {
const embedding = await this.getEmbeddings([query], signal);
return embedding?.[0]?.embedding;
}
abstract getEmbeddings(
input: string[],
signal?: AbortSignal

View File

@@ -60,6 +60,7 @@ export class CopilotWorkspaceService implements OnApplicationBootstrap {
await this.storage.put(userId, workspaceId, blobId, buffer);
const file = await this.models.copilotWorkspace.addFile(workspaceId, {
fileName,
blobId,
mimeType: content.mimetype,
size: buffer.length,
});

View File

@@ -55,6 +55,9 @@ export class CopilotWorkspaceFileType implements CopilotWorkspaceFile {
@Field(() => String)
fileId!: string;
@Field(() => String)
blobId!: string;
@Field(() => String)
fileName!: string;