diff --git a/packages/backend/server/migrations/20250429113337_workspace_file_blob_id/migration.sql b/packages/backend/server/migrations/20250429113337_workspace_file_blob_id/migration.sql new file mode 100644 index 0000000000..fe6de86cca --- /dev/null +++ b/packages/backend/server/migrations/20250429113337_workspace_file_blob_id/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "ai_workspace_files" ADD COLUMN "blob_id" VARCHAR NOT NULL DEFAULT ''; diff --git a/packages/backend/server/schema.prisma b/packages/backend/server/schema.prisma index 80560c7bab..166f28941d 100644 --- a/packages/backend/server/schema.prisma +++ b/packages/backend/server/schema.prisma @@ -518,6 +518,7 @@ model AiWorkspaceIgnoredDocs { model AiWorkspaceFiles { workspaceId String @map("workspace_id") @db.VarChar fileId String @map("file_id") @db.VarChar + blobId String @map("blob_id") @db.VarChar @default("") fileName String @map("file_name") @db.VarChar mimeType String @map("mime_type") @db.VarChar size Int @db.Integer diff --git a/packages/backend/server/src/__tests__/copilot.spec.ts b/packages/backend/server/src/__tests__/copilot.spec.ts index e4eb482420..f81ea02941 100644 --- a/packages/backend/server/src/__tests__/copilot.spec.ts +++ b/packages/backend/server/src/__tests__/copilot.spec.ts @@ -1316,7 +1316,11 @@ test('should be able to manage context', async t => { // file record { await storage.put(userId, session.workspaceId, 'blob', buffer); - const file = await session.addFile('blob', 'sample.pdf'); + const file = await session.addFile( + 'blob', + 'sample.pdf', + 'application/pdf' + ); const handler = Sinon.spy(event, 'emit'); @@ -1345,7 +1349,7 @@ test('should be able to manage context', async t => { 'should list file id' ); - const result = await session.matchFileChunks('test', 1, undefined, 1); + const result = await session.matchFiles('test', 1, undefined, 1); t.is(result.length, 1, 'should match context'); t.is(result[0].fileId, file.id, 'should match file id'); } @@ -1499,13 +1503,13 @@ test('should be able to manage workspace embedding', async t => { }); const contextSession = await context.create(sessionId); - const ret = await contextSession.matchFileChunks('test', 1, undefined, 1); + const ret = await contextSession.matchFiles('test', 1, undefined, 1); t.is(ret.length, 1, 'should match workspace context'); t.is(ret[0].content, 'content', 'should match content'); await workspace.update(ws.id, { enableDocEmbedding: false }); - const ret2 = await contextSession.matchFileChunks('test', 1, undefined, 1); + const ret2 = await contextSession.matchFiles('test', 1, undefined, 1); t.is(ret2.length, 0, 'should not match workspace context'); } }); diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.md b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.md index b4d85cc168..eacff8318c 100644 --- a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.md +++ b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.md @@ -6,6 +6,17 @@ Generated by [AVA](https://avajs.dev). ## should insert embedding by doc id +> should match file embedding + + [ + { + chunk: 0, + content: 'content', + distance: 0, + fileId: 'file-id', + }, + ] + > should return empty array when embedding deleted [] diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.snap b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.snap index d76cf43ae7..6a59b1c540 100644 Binary files a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.snap and b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-context.spec.ts.snap differ diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md index 4f430d9391..211f3b9a2f 100644 --- a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md +++ b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.md @@ -54,3 +54,18 @@ Generated by [AVA](https://avajs.dev). docId: 'new_doc', }, ] + +## should insert and search embedding + +> should match workspace file embedding + + [ + { + blobId: 'blob1', + chunk: 0, + content: 'content', + distance: 0, + mimeType: 'text/plain', + name: 'file1', + }, + ] diff --git a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap index c08b0d8c52..8447bb15c5 100644 Binary files a/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap and b/packages/backend/server/src/__tests__/models/__snapshots__/copilot-workspace.spec.ts.snap differ diff --git a/packages/backend/server/src/__tests__/models/copilot-context.spec.ts b/packages/backend/server/src/__tests__/models/copilot-context.spec.ts index d6a014f892..fedc7f29a4 100644 --- a/packages/backend/server/src/__tests__/models/copilot-context.spec.ts +++ b/packages/backend/server/src/__tests__/models/copilot-context.spec.ts @@ -111,8 +111,7 @@ test('should insert embedding by doc id', async t => { 1, 1 ); - t.is(ret.length, 1); - t.is(ret[0].content, 'content'); + t.snapshot(ret, 'should match file embedding'); } { diff --git a/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts b/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts index 6ece7f5d8e..6e1da27d34 100644 --- a/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts +++ b/packages/backend/server/src/__tests__/models/copilot-workspace.spec.ts @@ -107,6 +107,7 @@ test('should insert and search embedding', async t => { { const { fileId } = await t.context.copilotWorkspace.addFile(workspace.id, { fileName: 'file1', + blobId: 'blob1', mimeType: 'text/plain', size: 1, }); @@ -129,8 +130,10 @@ test('should insert and search embedding', async t => { 1, 1 ); - t.is(ret.length, 1); - t.is(ret[0].content, 'content'); + t.snapshot( + cleanObject(ret, ['fileId']), + 'should match workspace file embedding' + ); } } }); diff --git a/packages/backend/server/src/base/error/def.ts b/packages/backend/server/src/base/error/def.ts index bc29419801..e4f90f2bf9 100644 --- a/packages/backend/server/src/base/error/def.ts +++ b/packages/backend/server/src/base/error/def.ts @@ -714,6 +714,12 @@ export const USER_FRIENDLY_ERRORS = { message: ({ contextId, content, message }) => `Failed to match context ${contextId} with "${escape(content)}": ${message}`, }, + copilot_failed_to_match_global_context: { + type: 'internal_server_error', + args: { workspaceId: 'string', content: 'string', message: 'string' }, + message: ({ workspaceId, content, message }) => + `Failed to match context in workspace ${workspaceId} with "${escape(content)}": ${message}`, + }, copilot_embedding_disabled: { type: 'action_forbidden', message: `Embedding feature is disabled, please contact the administrator to enable it in the workspace settings.`, diff --git a/packages/backend/server/src/base/error/errors.gen.ts b/packages/backend/server/src/base/error/errors.gen.ts index 657f4dee74..2cf2cc099a 100644 --- a/packages/backend/server/src/base/error/errors.gen.ts +++ b/packages/backend/server/src/base/error/errors.gen.ts @@ -780,6 +780,18 @@ export class CopilotFailedToMatchContext extends UserFriendlyError { super('internal_server_error', 'copilot_failed_to_match_context', message, args); } } +@ObjectType() +class CopilotFailedToMatchGlobalContextDataType { + @Field() workspaceId!: string + @Field() content!: string + @Field() message!: string +} + +export class CopilotFailedToMatchGlobalContext extends UserFriendlyError { + constructor(args: CopilotFailedToMatchGlobalContextDataType, message?: string | ((args: CopilotFailedToMatchGlobalContextDataType) => string)) { + super('internal_server_error', 'copilot_failed_to_match_global_context', message, args); + } +} export class CopilotEmbeddingDisabled extends UserFriendlyError { constructor(message?: string) { @@ -1078,6 +1090,7 @@ export enum ErrorNames { COPILOT_CONTEXT_FILE_NOT_SUPPORTED, COPILOT_FAILED_TO_MODIFY_CONTEXT, COPILOT_FAILED_TO_MATCH_CONTEXT, + COPILOT_FAILED_TO_MATCH_GLOBAL_CONTEXT, COPILOT_EMBEDDING_DISABLED, COPILOT_EMBEDDING_UNAVAILABLE, COPILOT_TRANSCRIPTION_JOB_EXISTS, @@ -1114,5 +1127,5 @@ registerEnumType(ErrorNames, { export const ErrorDataUnionType = createUnionType({ name: 'ErrorDataUnion', types: () => - [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType] as const, + [GraphqlBadRequestDataType, HttpRequestErrorDataType, QueryTooLongDataType, ValidationErrorDataType, WrongSignInCredentialsDataType, UnknownOauthProviderDataType, InvalidOauthCallbackCodeDataType, MissingOauthQueryParameterDataType, InvalidEmailDataType, InvalidPasswordLengthDataType, WorkspacePermissionNotFoundDataType, SpaceNotFoundDataType, MemberNotFoundInSpaceDataType, NotInSpaceDataType, AlreadyInSpaceDataType, SpaceAccessDeniedDataType, SpaceOwnerNotFoundDataType, SpaceShouldHaveOnlyOneOwnerDataType, DocNotFoundDataType, DocActionDeniedDataType, DocUpdateBlockedDataType, VersionRejectedDataType, InvalidHistoryTimestampDataType, DocHistoryNotFoundDataType, BlobNotFoundDataType, ExpectToGrantDocUserRolesDataType, ExpectToRevokeDocUserRolesDataType, ExpectToUpdateDocUserRoleDataType, NoMoreSeatDataType, UnsupportedSubscriptionPlanDataType, SubscriptionAlreadyExistsDataType, SubscriptionNotExistsDataType, SameSubscriptionRecurringDataType, SubscriptionPlanNotFoundDataType, CopilotDocNotFoundDataType, CopilotMessageNotFoundDataType, CopilotPromptNotFoundDataType, CopilotProviderSideErrorDataType, CopilotInvalidContextDataType, CopilotContextFileNotSupportedDataType, CopilotFailedToModifyContextDataType, CopilotFailedToMatchContextDataType, CopilotFailedToMatchGlobalContextDataType, CopilotFailedToAddWorkspaceFileEmbeddingDataType, RuntimeConfigNotFoundDataType, InvalidRuntimeConfigTypeDataType, InvalidLicenseToActivateDataType, InvalidLicenseUpdateParamsDataType, UnsupportedClientVersionDataType, MentionUserDocAccessDeniedDataType] as const, }); diff --git a/packages/backend/server/src/models/common/copilot.ts b/packages/backend/server/src/models/common/copilot.ts index 2de68f40c7..73e5925e31 100644 --- a/packages/backend/server/src/models/common/copilot.ts +++ b/packages/backend/server/src/models/common/copilot.ts @@ -46,6 +46,7 @@ export const ContextFileSchema = z.object({ id: z.string(), chunkSize: z.number(), name: z.string(), + mimeType: z.string().optional(), status: ContextEmbedStatusSchema, error: z.string().nullable(), blobId: z.string(), @@ -100,6 +101,9 @@ export type ChunkSimilarity = { export type FileChunkSimilarity = ChunkSimilarity & { fileId: string; + blobId: string; + name: string; + mimeType: string; }; export type DocChunkSimilarity = ChunkSimilarity & { @@ -108,6 +112,7 @@ export type DocChunkSimilarity = ChunkSimilarity & { export const CopilotWorkspaceFileSchema = z.object({ fileName: z.string(), + blobId: z.string(), mimeType: z.string(), size: z.number(), }); diff --git a/packages/backend/server/src/models/copilot-context.ts b/packages/backend/server/src/models/copilot-context.ts index ab34530666..a241e0a788 100644 --- a/packages/backend/server/src/models/copilot-context.ts +++ b/packages/backend/server/src/models/copilot-context.ts @@ -1,6 +1,7 @@ import { randomUUID } from 'node:crypto'; import { Injectable } from '@nestjs/common'; +import { Transactional } from '@nestjs-cls/transactional'; import { Prisma } from '@prisma/client'; import { CopilotSessionNotFound } from '../base'; @@ -179,9 +180,9 @@ export class CopilotContextModel extends BaseModel { contextId: string, topK: number, threshold: number - ): Promise { + ): Promise[]> { const similarityChunks = await this.db.$queryRaw< - Array + Array> >` SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance" FROM "ai_context_embeddings" @@ -217,6 +218,7 @@ export class CopilotContextModel extends BaseModel { }); } + @Transactional() async matchWorkspaceEmbedding( embedding: number[], workspaceId: string, @@ -232,6 +234,18 @@ export class CopilotContextModel extends BaseModel { ORDER BY "distance" ASC LIMIT ${topK}; `; - return similarityChunks.filter(c => Number(c.distance) <= threshold); + + const matchedChunks = similarityChunks.filter( + c => Number(c.distance) <= threshold + ); + const matchedDocIds = Array.from(new Set(matchedChunks.map(c => c.docId))); + if (!matchDocIds?.length && matchedDocIds.length) { + const ignoredDocs = await this.models.copilotWorkspace.checkIgnoredDocs( + workspaceId, + matchedDocIds + ); + return matchedChunks.filter(c => !ignoredDocs.includes(c.docId)); + } + return matchedChunks; } } diff --git a/packages/backend/server/src/models/copilot-workspace.ts b/packages/backend/server/src/models/copilot-workspace.ts index 498e72eb5d..5d0c1d7c2c 100644 --- a/packages/backend/server/src/models/copilot-workspace.ts +++ b/packages/backend/server/src/models/copilot-workspace.ts @@ -16,6 +16,25 @@ import type { @Injectable() export class CopilotWorkspaceConfigModel extends BaseModel { + @Transactional() + private async listIgnoredDocIds( + workspaceId: string, + options?: PaginationInput + ) { + return await this.db.aiWorkspaceIgnoredDocs.findMany({ + where: { + workspaceId, + }, + select: { + docId: true, + createdAt: true, + }, + orderBy: { createdAt: 'desc' }, + skip: options?.offset, + take: options?.first, + }); + } + @Transactional() async updateIgnoredDocs( workspaceId: string, @@ -23,7 +42,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel { remove: string[] = [] ) { const removed = new Set(remove); - const ignored = await this.listIgnoredDocs(workspaceId).then( + const ignored = await this.listIgnoredDocIds(workspaceId).then( r => new Set(r.map(r => r.docId).filter(id => !removed.has(id))) ); const added = add.filter(id => !ignored.has(id)); @@ -51,25 +70,11 @@ export class CopilotWorkspaceConfigModel extends BaseModel { return added.length + ignored.size; } - @Transactional() async listIgnoredDocs( workspaceId: string, - options?: { - includeRead?: boolean; - } & PaginationInput + options?: PaginationInput ): Promise { - const row = await this.db.aiWorkspaceIgnoredDocs.findMany({ - where: { - workspaceId, - }, - select: { - docId: true, - createdAt: true, - }, - orderBy: { createdAt: 'desc' }, - skip: options?.offset, - take: options?.first, - }); + const row = await this.listIgnoredDocIds(workspaceId, options); const ids = row.map(r => ({ workspaceId, docId: r.docId })); const docs = await this.models.doc.findMetas(ids); const docsMap = new Map( @@ -106,7 +111,7 @@ export class CopilotWorkspaceConfigModel extends BaseModel { @Transactional() async checkIgnoredDocs(workspaceId: string, docIds: string[]) { - const ignored = await this.listIgnoredDocs(workspaceId).then( + const ignored = await this.listIgnoredDocIds(workspaceId).then( r => new Set(r.map(r => r.docId)) ); @@ -214,9 +219,19 @@ export class CopilotWorkspaceConfigModel extends BaseModel { const similarityChunks = await this.db.$queryRaw< Array >` - SELECT "file_id" as "fileId", "chunk", "content", "embedding" <=> ${embedding}::vector as "distance" - FROM "ai_workspace_file_embeddings" - WHERE workspace_id = ${workspaceId} + SELECT + e."file_id" as "fileId", + f."file_name" as "name", + f."blob_id" as "blobId", + f."mime_type" as "mimeType", + e."chunk", + e."content", + e."embedding" <=> ${embedding}::vector as "distance" + FROM "ai_workspace_file_embeddings" e + JOIN "ai_workspace_files" f + ON e."workspace_id" = f."workspace_id" + AND e."file_id" = f."file_id" + WHERE e.workspace_id = ${workspaceId} ORDER BY "distance" ASC LIMIT ${topK}; `; diff --git a/packages/backend/server/src/plugins/copilot/context/resolver.ts b/packages/backend/server/src/plugins/copilot/context/resolver.ts index 52b8f29be1..ac6a89bd1b 100644 --- a/packages/backend/server/src/plugins/copilot/context/resolver.ts +++ b/packages/backend/server/src/plugins/copilot/context/resolver.ts @@ -23,6 +23,7 @@ import { CallMetric, CopilotEmbeddingUnavailable, CopilotFailedToMatchContext, + CopilotFailedToMatchGlobalContext, CopilotFailedToModifyContext, CopilotSessionNotFound, EventBus, @@ -117,8 +118,8 @@ class RemoveContextFileInput { @ObjectType('CopilotContext') export class CopilotContextType { - @Field(() => ID) - id!: string; + @Field(() => ID, { nullable: true }) + id!: string | undefined; @Field(() => String) workspaceId!: string; @@ -169,6 +170,9 @@ class CopilotContextFile implements ContextFile { @Field(() => String) name!: string; + @Field(() => String) + mimeType!: string; + @Field(() => SafeIntResolver) chunkSize!: number; @@ -190,6 +194,15 @@ class ContextMatchedFileChunk implements FileChunkSimilarity { @Field(() => String) fileId!: string; + @Field(() => String) + blobId!: string; + + @Field(() => String) + name!: string; + + @Field(() => String) + mimeType!: string; + @Field(() => SafeIntResolver) chunk!: number; @@ -283,6 +296,15 @@ export class CopilotContextRootResolver { } } + if (copilot.workspaceId) { + return [ + { + id: undefined, + workspaceId: copilot.workspaceId, + }, + ]; + } + return []; } @@ -387,6 +409,9 @@ export class CopilotContextResolver { async collections( @Parent() context: CopilotContextType ): Promise { + if (!context.id) { + return []; + } const session = await this.context.get(context.id); const collections = session.collections; await this.models.copilotContext.mergeDocStatus( @@ -404,6 +429,9 @@ export class CopilotContextResolver { async tags( @Parent() context: CopilotContextType ): Promise { + if (!context.id) { + return []; + } const session = await this.context.get(context.id); const tags = session.tags; await this.models.copilotContext.mergeDocStatus( @@ -419,6 +447,9 @@ export class CopilotContextResolver { }) @CallMetric('ai', 'context_file_list') async docs(@Parent() context: CopilotContextType): Promise { + if (!context.id) { + return []; + } const session = await this.context.get(context.id); const docs = session.docs; await this.models.copilotContext.mergeDocStatus(session.workspaceId, docs); @@ -433,6 +464,9 @@ export class CopilotContextResolver { async files( @Parent() context: CopilotContextType ): Promise { + if (!context.id) { + return []; + } const session = await this.context.get(context.id); return session.files; } @@ -593,7 +627,11 @@ export class CopilotContextResolver { const session = await this.context.get(options.contextId); try { - const file = await session.addFile(options.blobId, content.filename); + const file = await session.addFile( + options.blobId, + content.filename, + content.mimetype + ); const buffer = await readStream(content.createReadStream()); await this.storage.put( @@ -664,6 +702,8 @@ export class CopilotContextResolver { @Args('content') content: string, @Args('limit', { type: () => SafeIntResolver, nullable: true }) limit?: number, + @Args('scopedThreshold', { type: () => Float, nullable: true }) + scopedThreshold?: number, @Args('threshold', { type: () => Float, nullable: true }) threshold?: number ): Promise { @@ -671,22 +711,46 @@ export class CopilotContextResolver { return []; } - const session = await this.context.get(context.id); - try { - return await session.matchFileChunks( + if (!context.id) { + return await this.context.matchWorkspaceFiles( + context.workspaceId, + content, + limit, + this.getSignal(ctx.req), + threshold + ); + } + + const session = await this.context.get(context.id); + return await session.matchFiles( content, limit, this.getSignal(ctx.req), + scopedThreshold, threshold ); } catch (e: any) { - throw new CopilotFailedToMatchContext({ - contextId: context.id, - // don't record the large content - content: content.slice(0, 512), - message: e.message, - }); + // passthrough user friendly error + if (e instanceof UserFriendlyError) { + throw e; + } + + if (context.id) { + throw new CopilotFailedToMatchContext({ + contextId: context.id, + // don't record the large content + content: content.slice(0, 512), + message: e.message, + }); + } else { + throw new CopilotFailedToMatchGlobalContext({ + workspaceId: context.workspaceId, + // don't record the large content + content: content.slice(0, 512), + message: e.message, + }); + } } } @@ -711,20 +775,38 @@ export class CopilotContextResolver { } try { - const session = await this.context.get(context.id); await this.ac .user(user.id) - .workspace(session.workspaceId) + .workspace(context.workspaceId) .allowLocal() .assert('Workspace.Copilot'); const allowEmbedding = await this.models.workspace.allowEmbedding( - session.workspaceId + context.workspaceId ); if (!allowEmbedding) { return []; } - const chunks = await session.matchWorkspaceChunks( + if (!context.id) { + return await this.context.matchWorkspaceDocs( + context.workspaceId, + content, + limit, + this.getSignal(ctx.req), + threshold + ); + } + + const session = await this.context.get(context.id); + if (session.workspaceId !== context.workspaceId) { + throw new CopilotFailedToMatchContext({ + contextId: context.id, + // don't record the large content + content: content.slice(0, 512), + message: 'context not in the same workspace', + }); + } + const chunks = await session.matchWorkspaceDocs( content, limit, this.getSignal(ctx.req), @@ -748,12 +830,22 @@ export class CopilotContextResolver { if (e instanceof UserFriendlyError) { throw e; } - throw new CopilotFailedToMatchContext({ - contextId: context.id, - // don't record the large content - content: content.slice(0, 512), - message: e.message, - }); + + if (context.id) { + throw new CopilotFailedToMatchContext({ + contextId: context.id, + // don't record the large content + content: content.slice(0, 512), + message: e.message, + }); + } else { + throw new CopilotFailedToMatchGlobalContext({ + workspaceId: context.workspaceId, + // don't record the large content + content: content.slice(0, 512), + message: e.message, + }); + } } } } diff --git a/packages/backend/server/src/plugins/copilot/context/service.ts b/packages/backend/server/src/plugins/copilot/context/service.ts index f07c3ef1ca..2b450ef9a4 100644 --- a/packages/backend/server/src/plugins/copilot/context/service.ts +++ b/packages/backend/server/src/plugins/copilot/context/service.ts @@ -148,6 +148,48 @@ export class CopilotContextService implements OnApplicationBootstrap { return null; } + async matchWorkspaceFiles( + workspaceId: string, + content: string, + topK: number = 5, + signal?: AbortSignal, + threshold: number = 0.5 + ) { + if (!this.embeddingClient) return []; + const embedding = await this.embeddingClient.getEmbedding(content, signal); + if (!embedding) return []; + + const chunks = await this.models.copilotWorkspace.matchFileEmbedding( + workspaceId, + embedding, + topK * 2, + threshold + ); + + return this.embeddingClient.reRank(content, chunks, topK, signal); + } + + async matchWorkspaceDocs( + workspaceId: string, + content: string, + topK: number = 5, + signal?: AbortSignal, + threshold: number = 0.5 + ) { + if (!this.embeddingClient) return []; + const embedding = await this.embeddingClient.getEmbedding(content, signal); + if (!embedding) return []; + + const workspace = await this.models.copilotContext.matchWorkspaceEmbedding( + embedding, + workspaceId, + topK * 2, + threshold + ); + + return this.embeddingClient.reRank(content, workspace, topK); + } + @OnEvent('workspace.doc.embed.failed') async onDocEmbedFailed({ contextId, diff --git a/packages/backend/server/src/plugins/copilot/context/session.ts b/packages/backend/server/src/plugins/copilot/context/session.ts index 05dd9ff6af..a84ac48726 100644 --- a/packages/backend/server/src/plugins/copilot/context/session.ts +++ b/packages/backend/server/src/plugins/copilot/context/session.ts @@ -52,12 +52,16 @@ export class ContextSession implements AsyncDisposable { } get files() { - return this.config.files.map(f => ({ ...f })); + return this.config.files.map(f => this.fulfillFile(f)); } get docIds() { return Array.from( - new Set([this.config.docs, this.config.categories].flat().map(d => d.id)) + new Set( + [this.config.docs, this.config.categories.flatMap(c => c.docs)] + .flat() + .map(d => d.id) + ) ); } @@ -136,14 +140,25 @@ export class ContextSession implements AsyncDisposable { return true; } - async addFile(blobId: string, name: string): Promise { + private fulfillFile(file: ContextFile): Required { + return { + ...file, + mimeType: file.mimeType || 'application/octet-stream', + }; + } + + async addFile( + blobId: string, + name: string, + mimeType: string + ): Promise> { let fileId = nanoid(); const existsBlob = this.config.files.find(f => f.blobId === blobId); if (existsBlob) { // use exists file id if the blob exists // we assume that the file content pointed to by the same blobId is consistent. if (existsBlob.status === ContextEmbedStatus.finished) { - return existsBlob; + return this.fulfillFile(existsBlob); } fileId = existsBlob.id; } else { @@ -152,11 +167,12 @@ export class ContextSession implements AsyncDisposable { blobId, chunkSize: 0, name, + mimeType, error: null, createdAt: Date.now(), })); } - return this.getFile(fileId) as ContextFile; + return this.fulfillFile(this.getFile(fileId) as ContextFile); } getFile(fileId: string): ContextFile | undefined { @@ -181,15 +197,14 @@ export class ContextSession implements AsyncDisposable { * @param threshold relevance threshold for the similarity score, higher threshold means more similar chunks, default 0.7, good enough based on prior experiments * @returns list of similar chunks */ - async matchFileChunks( + async matchFiles( content: string, topK: number = 5, signal?: AbortSignal, - threshold: number = 0.85 + scopedThreshold: number = 0.85, + threshold: number = 0.5 ): Promise { - const embedding = await this.client - .getEmbeddings([content], signal) - .then(r => r?.[0]?.embedding); + const embedding = await this.client.getEmbedding(content, signal); if (!embedding) return []; const [context, workspace] = await Promise.all([ @@ -197,7 +212,7 @@ export class ContextSession implements AsyncDisposable { embedding, this.id, topK * 2, - threshold + scopedThreshold ), this.models.copilotWorkspace.matchFileEmbedding( this.workspaceId, @@ -206,10 +221,21 @@ export class ContextSession implements AsyncDisposable { threshold ), ]); + const files = new Map(this.files.map(f => [f.id, f])); return this.client.reRank( content, - [...context, ...workspace], + [ + ...context + .filter(f => files.has(f.fileId)) + .map(c => { + const { blobId, name, mimeType } = files.get( + c.fileId + ) as Required; + return { ...c, blobId, name, mimeType }; + }), + ...workspace, + ], topK, signal ); @@ -223,16 +249,14 @@ export class ContextSession implements AsyncDisposable { * @param threshold relevance threshold for the similarity score, higher threshold means more similar chunks, default 0.7, good enough based on prior experiments * @returns list of similar chunks */ - async matchWorkspaceChunks( + async matchWorkspaceDocs( content: string, topK: number = 5, signal?: AbortSignal, - scopedThreshold: number = 0.5, - threshold: number = 0.85 + scopedThreshold: number = 0.85, + threshold: number = 0.5 ) { - const embedding = await this.client - .getEmbeddings([content], signal) - .then(r => r?.[0]?.embedding); + const embedding = await this.client.getEmbedding(content, signal); if (!embedding) return []; const docIds = this.docIds; diff --git a/packages/backend/server/src/plugins/copilot/context/types.ts b/packages/backend/server/src/plugins/copilot/context/types.ts index e43ff1ab58..9387325025 100644 --- a/packages/backend/server/src/plugins/copilot/context/types.ts +++ b/packages/backend/server/src/plugins/copilot/context/types.ts @@ -128,6 +128,11 @@ export abstract class EmbeddingClient { .slice(0, topK); } + async getEmbedding(query: string, signal?: AbortSignal) { + const embedding = await this.getEmbeddings([query], signal); + return embedding?.[0]?.embedding; + } + abstract getEmbeddings( input: string[], signal?: AbortSignal diff --git a/packages/backend/server/src/plugins/copilot/workspace/service.ts b/packages/backend/server/src/plugins/copilot/workspace/service.ts index 85ed5d1a0b..34307a8bd6 100644 --- a/packages/backend/server/src/plugins/copilot/workspace/service.ts +++ b/packages/backend/server/src/plugins/copilot/workspace/service.ts @@ -60,6 +60,7 @@ export class CopilotWorkspaceService implements OnApplicationBootstrap { await this.storage.put(userId, workspaceId, blobId, buffer); const file = await this.models.copilotWorkspace.addFile(workspaceId, { fileName, + blobId, mimeType: content.mimetype, size: buffer.length, }); diff --git a/packages/backend/server/src/plugins/copilot/workspace/types.ts b/packages/backend/server/src/plugins/copilot/workspace/types.ts index 8995b1bbf3..fb719a0388 100644 --- a/packages/backend/server/src/plugins/copilot/workspace/types.ts +++ b/packages/backend/server/src/plugins/copilot/workspace/types.ts @@ -55,6 +55,9 @@ export class CopilotWorkspaceFileType implements CopilotWorkspaceFile { @Field(() => String) fileId!: string; + @Field(() => String) + blobId!: string; + @Field(() => String) fileName!: string; diff --git a/packages/backend/server/src/schema.gql b/packages/backend/server/src/schema.gql index 288cf86880..db14f1baad 100644 --- a/packages/backend/server/src/schema.gql +++ b/packages/backend/server/src/schema.gql @@ -69,10 +69,13 @@ type ContextMatchedDocChunk { } type ContextMatchedFileChunk { + blobId: String! chunk: SafeInt! content: String! distance: Float fileId: String! + mimeType: String! + name: String! } type ContextWorkspaceEmbeddingStatus { @@ -107,10 +110,10 @@ type CopilotContext { """list files in context""" files: [CopilotContextFile!]! - id: ID! + id: ID """match file in context""" - matchFiles(content: String!, limit: SafeInt, threshold: Float): [ContextMatchedFileChunk!]! + matchFiles(content: String!, limit: SafeInt, scopedThreshold: Float, threshold: Float): [ContextMatchedFileChunk!]! """match workspace docs""" matchWorkspaceDocs(content: String!, limit: SafeInt, scopedThreshold: Float, threshold: Float): [ContextMatchedDocChunk!]! @@ -140,6 +143,7 @@ type CopilotContextFile { createdAt: SafeInt! error: String id: ID! + mimeType: String! name: String! status: ContextEmbedStatus! } @@ -169,6 +173,12 @@ type CopilotFailedToMatchContextDataType { message: String! } +type CopilotFailedToMatchGlobalContextDataType { + content: String! + message: String! + workspaceId: String! +} + type CopilotFailedToModifyContextDataType { contextId: String! message: String! @@ -280,6 +290,7 @@ type CopilotWorkspaceConfig { } type CopilotWorkspaceFile { + blobId: String! createdAt: DateTime! fileId: String! fileName: String! @@ -464,7 +475,7 @@ type EditorType { name: String! } -union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType +union ErrorDataUnion = AlreadyInSpaceDataType | BlobNotFoundDataType | CopilotContextFileNotSupportedDataType | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType | CopilotPromptNotFoundDataType | CopilotProviderSideErrorDataType | DocActionDeniedDataType | DocHistoryNotFoundDataType | DocNotFoundDataType | DocUpdateBlockedDataType | ExpectToGrantDocUserRolesDataType | ExpectToRevokeDocUserRolesDataType | ExpectToUpdateDocUserRoleDataType | GraphqlBadRequestDataType | HttpRequestErrorDataType | InvalidEmailDataType | InvalidHistoryTimestampDataType | InvalidLicenseToActivateDataType | InvalidLicenseUpdateParamsDataType | InvalidOauthCallbackCodeDataType | InvalidPasswordLengthDataType | InvalidRuntimeConfigTypeDataType | MemberNotFoundInSpaceDataType | MentionUserDocAccessDeniedDataType | MissingOauthQueryParameterDataType | NoMoreSeatDataType | NotInSpaceDataType | QueryTooLongDataType | RuntimeConfigNotFoundDataType | SameSubscriptionRecurringDataType | SpaceAccessDeniedDataType | SpaceNotFoundDataType | SpaceOwnerNotFoundDataType | SpaceShouldHaveOnlyOneOwnerDataType | SubscriptionAlreadyExistsDataType | SubscriptionNotExistsDataType | SubscriptionPlanNotFoundDataType | UnknownOauthProviderDataType | UnsupportedClientVersionDataType | UnsupportedSubscriptionPlanDataType | ValidationErrorDataType | VersionRejectedDataType | WorkspacePermissionNotFoundDataType | WrongSignInCredentialsDataType enum ErrorNames { ACCESS_DENIED @@ -491,6 +502,7 @@ enum ErrorNames { COPILOT_FAILED_TO_CREATE_MESSAGE COPILOT_FAILED_TO_GENERATE_TEXT COPILOT_FAILED_TO_MATCH_CONTEXT + COPILOT_FAILED_TO_MATCH_GLOBAL_CONTEXT COPILOT_FAILED_TO_MODIFY_CONTEXT COPILOT_INVALID_CONTEXT COPILOT_MESSAGE_NOT_FOUND diff --git a/packages/common/graphql/src/graphql/copilot-context-file-add.gql b/packages/common/graphql/src/graphql/copilot-context-file-add.gql index 66044663d6..d8e4940764 100644 --- a/packages/common/graphql/src/graphql/copilot-context-file-add.gql +++ b/packages/common/graphql/src/graphql/copilot-context-file-add.gql @@ -3,6 +3,7 @@ mutation addContextFile($content: Upload!, $options: AddContextFileInput!) { id createdAt name + mimeType chunkSize error status diff --git a/packages/common/graphql/src/graphql/copilot-context-list-object.gql b/packages/common/graphql/src/graphql/copilot-context-list-object.gql index 3f102ce0e8..1cedfbeb2c 100644 --- a/packages/common/graphql/src/graphql/copilot-context-list-object.gql +++ b/packages/common/graphql/src/graphql/copilot-context-list-object.gql @@ -15,6 +15,7 @@ query listContextObject( files { id name + mimeType blobId chunkSize error diff --git a/packages/common/graphql/src/graphql/copilot-context-match-all.gql b/packages/common/graphql/src/graphql/copilot-context-match-all.gql index 00d386abe7..ba39cd04f0 100644 --- a/packages/common/graphql/src/graphql/copilot-context-match-all.gql +++ b/packages/common/graphql/src/graphql/copilot-context-match-all.gql @@ -1,14 +1,17 @@ -query matchContext($contextId: String!, $content: String!, $limit: SafeInt, $threshold: Float) { +query matchContext($contextId: String, $workspaceId: String, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { currentUser { - copilot { + copilot(workspaceId: $workspaceId) { contexts(contextId: $contextId) { - matchFiles(content: $content, limit: $limit, threshold: $threshold) { + matchFiles(content: $content, limit: $limit, scopedThreshold: $scopedThreshold, threshold: $threshold) { fileId + blobId + name + mimeType chunk content distance } - matchWorkspaceDocs(content: $content, limit: $limit, threshold: $threshold) { + matchWorkspaceDocs(content: $content, limit: $limit, scopedThreshold: $scopedThreshold, threshold: $threshold) { docId chunk content diff --git a/packages/common/graphql/src/graphql/copilot-context-match-docs.gql b/packages/common/graphql/src/graphql/copilot-context-match-docs.gql index 83bef6bc6b..cd3b50b4eb 100644 --- a/packages/common/graphql/src/graphql/copilot-context-match-docs.gql +++ b/packages/common/graphql/src/graphql/copilot-context-match-docs.gql @@ -1,6 +1,6 @@ -query matchWorkspaceDocs($contextId: String!, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { +query matchWorkspaceDocs($contextId: String, $workspaceId: String, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { currentUser { - copilot { + copilot(workspaceId: $workspaceId) { contexts(contextId: $contextId) { matchWorkspaceDocs(content: $content, limit: $limit, scopedThreshold: $scopedThreshold, threshold: $threshold) { docId diff --git a/packages/common/graphql/src/graphql/copilot-context-match-files.gql b/packages/common/graphql/src/graphql/copilot-context-match-files.gql index a6e6fb4b07..b2e8b76175 100644 --- a/packages/common/graphql/src/graphql/copilot-context-match-files.gql +++ b/packages/common/graphql/src/graphql/copilot-context-match-files.gql @@ -1,9 +1,10 @@ -query matchFiles($contextId: String!, $content: String!, $limit: SafeInt, $threshold: Float) { +query matchFiles($contextId: String, $workspaceId: String, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { currentUser { - copilot { + copilot(workspaceId: $workspaceId) { contexts(contextId: $contextId) { - matchFiles(content: $content, limit: $limit, threshold: $threshold) { + matchFiles(content: $content, limit: $limit, scopedThreshold: $scopedThreshold, threshold: $threshold) { fileId + blobId chunk content distance diff --git a/packages/common/graphql/src/graphql/copilot-workspace-file-add.gql b/packages/common/graphql/src/graphql/copilot-workspace-file-add.gql index d6cee2aceb..e736a9199d 100644 --- a/packages/common/graphql/src/graphql/copilot-workspace-file-add.gql +++ b/packages/common/graphql/src/graphql/copilot-workspace-file-add.gql @@ -2,6 +2,7 @@ mutation addWorkspaceEmbeddingFiles($workspaceId: String!, $blob: Upload!) { addWorkspaceEmbeddingFiles(workspaceId: $workspaceId, blob: $blob) { fileId fileName + blobId mimeType size createdAt diff --git a/packages/common/graphql/src/graphql/copilot-workspace-file-get.gql b/packages/common/graphql/src/graphql/copilot-workspace-file-get.gql index 89e72ac1f5..b058022abd 100644 --- a/packages/common/graphql/src/graphql/copilot-workspace-file-get.gql +++ b/packages/common/graphql/src/graphql/copilot-workspace-file-get.gql @@ -11,6 +11,7 @@ query getWorkspaceEmbeddingFiles($workspaceId: String!, $pagination: PaginationI node { fileId fileName + blobId mimeType size createdAt diff --git a/packages/common/graphql/src/graphql/index.ts b/packages/common/graphql/src/graphql/index.ts index ff7c06e7b7..1556b9472a 100644 --- a/packages/common/graphql/src/graphql/index.ts +++ b/packages/common/graphql/src/graphql/index.ts @@ -390,6 +390,7 @@ export const addContextFileMutation = { id createdAt name + mimeType chunkSize error status @@ -423,6 +424,7 @@ export const listContextObjectQuery = { files { id name + mimeType blobId chunkSize error @@ -473,17 +475,30 @@ export const listContextQuery = { export const matchContextQuery = { id: 'matchContextQuery' as const, op: 'matchContext', - query: `query matchContext($contextId: String!, $content: String!, $limit: SafeInt, $threshold: Float) { + query: `query matchContext($contextId: String, $workspaceId: String, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { currentUser { - copilot { + copilot(workspaceId: $workspaceId) { contexts(contextId: $contextId) { - matchFiles(content: $content, limit: $limit, threshold: $threshold) { + matchFiles( + content: $content + limit: $limit + scopedThreshold: $scopedThreshold + threshold: $threshold + ) { fileId + blobId + name + mimeType chunk content distance } - matchWorkspaceDocs(content: $content, limit: $limit, threshold: $threshold) { + matchWorkspaceDocs( + content: $content + limit: $limit + scopedThreshold: $scopedThreshold + threshold: $threshold + ) { docId chunk content @@ -498,9 +513,9 @@ export const matchContextQuery = { export const matchWorkspaceDocsQuery = { id: 'matchWorkspaceDocsQuery' as const, op: 'matchWorkspaceDocs', - query: `query matchWorkspaceDocs($contextId: String!, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { + query: `query matchWorkspaceDocs($contextId: String, $workspaceId: String, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { currentUser { - copilot { + copilot(workspaceId: $workspaceId) { contexts(contextId: $contextId) { matchWorkspaceDocs( content: $content @@ -522,12 +537,18 @@ export const matchWorkspaceDocsQuery = { export const matchFilesQuery = { id: 'matchFilesQuery' as const, op: 'matchFiles', - query: `query matchFiles($contextId: String!, $content: String!, $limit: SafeInt, $threshold: Float) { + query: `query matchFiles($contextId: String, $workspaceId: String, $content: String!, $limit: SafeInt, $scopedThreshold: Float, $threshold: Float) { currentUser { - copilot { + copilot(workspaceId: $workspaceId) { contexts(contextId: $contextId) { - matchFiles(content: $content, limit: $limit, threshold: $threshold) { + matchFiles( + content: $content + limit: $limit + scopedThreshold: $scopedThreshold + threshold: $threshold + ) { fileId + blobId chunk content distance @@ -750,6 +771,7 @@ export const addWorkspaceEmbeddingFilesMutation = { addWorkspaceEmbeddingFiles(workspaceId: $workspaceId, blob: $blob) { fileId fileName + blobId mimeType size createdAt @@ -774,6 +796,7 @@ export const getWorkspaceEmbeddingFilesQuery = { node { fileId fileName + blobId mimeType size createdAt diff --git a/packages/common/graphql/src/schema.ts b/packages/common/graphql/src/schema.ts index 97395c613c..c4026b01c7 100644 --- a/packages/common/graphql/src/schema.ts +++ b/packages/common/graphql/src/schema.ts @@ -109,10 +109,13 @@ export interface ContextMatchedDocChunk { export interface ContextMatchedFileChunk { __typename?: 'ContextMatchedFileChunk'; + blobId: Scalars['String']['output']; chunk: Scalars['SafeInt']['output']; content: Scalars['String']['output']; distance: Maybe; fileId: Scalars['String']['output']; + mimeType: Scalars['String']['output']; + name: Scalars['String']['output']; } export interface ContextWorkspaceEmbeddingStatus { @@ -172,7 +175,7 @@ export interface CopilotContext { docs: Array; /** list files in context */ files: Array; - id: Scalars['ID']['output']; + id: Maybe; /** match file in context */ matchFiles: Array; /** match workspace docs */ @@ -185,6 +188,7 @@ export interface CopilotContext { export interface CopilotContextMatchFilesArgs { content: Scalars['String']['input']; limit?: InputMaybe; + scopedThreshold?: InputMaybe; threshold?: InputMaybe; } @@ -218,6 +222,7 @@ export interface CopilotContextFile { createdAt: Scalars['SafeInt']['output']; error: Maybe; id: Scalars['ID']['output']; + mimeType: Scalars['String']['output']; name: Scalars['String']['output']; status: ContextEmbedStatus; } @@ -252,6 +257,13 @@ export interface CopilotFailedToMatchContextDataType { message: Scalars['String']['output']; } +export interface CopilotFailedToMatchGlobalContextDataType { + __typename?: 'CopilotFailedToMatchGlobalContextDataType'; + content: Scalars['String']['output']; + message: Scalars['String']['output']; + workspaceId: Scalars['String']['output']; +} + export interface CopilotFailedToModifyContextDataType { __typename?: 'CopilotFailedToModifyContextDataType'; contextId: Scalars['String']['output']; @@ -383,6 +395,7 @@ export interface CopilotWorkspaceConfigIgnoredDocsArgs { export interface CopilotWorkspaceFile { __typename?: 'CopilotWorkspaceFile'; + blobId: Scalars['String']['output']; createdAt: Scalars['DateTime']['output']; fileId: Scalars['String']['output']; fileName: Scalars['String']['output']; @@ -582,6 +595,7 @@ export type ErrorDataUnion = | CopilotDocNotFoundDataType | CopilotFailedToAddWorkspaceFileEmbeddingDataType | CopilotFailedToMatchContextDataType + | CopilotFailedToMatchGlobalContextDataType | CopilotFailedToModifyContextDataType | CopilotInvalidContextDataType | CopilotMessageNotFoundDataType @@ -651,6 +665,7 @@ export enum ErrorNames { COPILOT_FAILED_TO_CREATE_MESSAGE = 'COPILOT_FAILED_TO_CREATE_MESSAGE', COPILOT_FAILED_TO_GENERATE_TEXT = 'COPILOT_FAILED_TO_GENERATE_TEXT', COPILOT_FAILED_TO_MATCH_CONTEXT = 'COPILOT_FAILED_TO_MATCH_CONTEXT', + COPILOT_FAILED_TO_MATCH_GLOBAL_CONTEXT = 'COPILOT_FAILED_TO_MATCH_GLOBAL_CONTEXT', COPILOT_FAILED_TO_MODIFY_CONTEXT = 'COPILOT_FAILED_TO_MODIFY_CONTEXT', COPILOT_INVALID_CONTEXT = 'COPILOT_INVALID_CONTEXT', COPILOT_MESSAGE_NOT_FOUND = 'COPILOT_MESSAGE_NOT_FOUND', @@ -2876,6 +2891,7 @@ export type AddContextFileMutation = { id: string; createdAt: number; name: string; + mimeType: string; chunkSize: number; error: string | null; status: ContextEmbedStatus; @@ -2917,6 +2933,7 @@ export type ListContextObjectQuery = { __typename?: 'CopilotContextFile'; id: string; name: string; + mimeType: string; blobId: string; chunkSize: number; error: string | null; @@ -2965,7 +2982,7 @@ export type ListContextQuery = { __typename?: 'Copilot'; contexts: Array<{ __typename?: 'CopilotContext'; - id: string; + id: string | null; workspaceId: string; }>; }; @@ -2973,9 +2990,11 @@ export type ListContextQuery = { }; export type MatchContextQueryVariables = Exact<{ - contextId: Scalars['String']['input']; + contextId?: InputMaybe; + workspaceId?: InputMaybe; content: Scalars['String']['input']; limit?: InputMaybe; + scopedThreshold?: InputMaybe; threshold?: InputMaybe; }>; @@ -2990,6 +3009,9 @@ export type MatchContextQuery = { matchFiles: Array<{ __typename?: 'ContextMatchedFileChunk'; fileId: string; + blobId: string; + name: string; + mimeType: string; chunk: number; content: string; distance: number | null; @@ -3007,7 +3029,8 @@ export type MatchContextQuery = { }; export type MatchWorkspaceDocsQueryVariables = Exact<{ - contextId: Scalars['String']['input']; + contextId?: InputMaybe; + workspaceId?: InputMaybe; content: Scalars['String']['input']; limit?: InputMaybe; scopedThreshold?: InputMaybe; @@ -3035,9 +3058,11 @@ export type MatchWorkspaceDocsQuery = { }; export type MatchFilesQueryVariables = Exact<{ - contextId: Scalars['String']['input']; + contextId?: InputMaybe; + workspaceId?: InputMaybe; content: Scalars['String']['input']; limit?: InputMaybe; + scopedThreshold?: InputMaybe; threshold?: InputMaybe; }>; @@ -3052,6 +3077,7 @@ export type MatchFilesQuery = { matchFiles: Array<{ __typename?: 'ContextMatchedFileChunk'; fileId: string; + blobId: string; chunk: number; content: string; distance: number | null; @@ -3321,6 +3347,7 @@ export type AddWorkspaceEmbeddingFilesMutation = { __typename?: 'CopilotWorkspaceFile'; fileId: string; fileName: string; + blobId: string; mimeType: string; size: number; createdAt: string; @@ -3352,6 +3379,7 @@ export type GetWorkspaceEmbeddingFilesQuery = { __typename?: 'CopilotWorkspaceFile'; fileId: string; fileName: string; + blobId: string; mimeType: string; size: number; createdAt: string; diff --git a/packages/frontend/core/src/blocksuite/ai/actions/types.ts b/packages/frontend/core/src/blocksuite/ai/actions/types.ts index cdf627567b..38d54c93d5 100644 --- a/packages/frontend/core/src/blocksuite/ai/actions/types.ts +++ b/packages/frontend/core/src/blocksuite/ai/actions/types.ts @@ -329,9 +329,12 @@ declare global { abortSignal: AbortSignal ) => Promise; matchContext: ( - contextId: string, content: string, - limit?: number + contextId?: string, + workspaceId?: string, + limit?: number, + scopedThreshold?: number, + threshold?: number ) => Promise<{ files?: ContextMatchedFileChunk[]; docs?: ContextMatchedDocChunk[]; diff --git a/packages/frontend/core/src/blocksuite/ai/components/ai-chat-input/ai-chat-input.ts b/packages/frontend/core/src/blocksuite/ai/components/ai-chat-input/ai-chat-input.ts index d4a3a8e0bb..b439897e08 100644 --- a/packages/frontend/core/src/blocksuite/ai/components/ai-chat-input/ai-chat-input.ts +++ b/packages/frontend/core/src/blocksuite/ai/components/ai-chat-input/ai-chat-input.ts @@ -661,7 +661,7 @@ export class AIChatInput extends SignalWatcher(WithDisposable(LitElement)) { >(); const { files: matchedFiles = [], docs: matchedDocs = [] } = - (await AIProvider.context?.matchContext(contextId, userInput)) ?? {}; + (await AIProvider.context?.matchContext(userInput, contextId)) ?? {}; matchedDocs.forEach(doc => { docContexts.set(doc.docId, { diff --git a/packages/frontend/core/src/blocksuite/ai/provider/copilot-client.ts b/packages/frontend/core/src/blocksuite/ai/provider/copilot-client.ts index ee3c910bc5..05403d2c0e 100644 --- a/packages/frontend/core/src/blocksuite/ai/provider/copilot-client.ts +++ b/packages/frontend/core/src/blocksuite/ai/provider/copilot-client.ts @@ -241,7 +241,7 @@ export class CopilotClient { sessionId, }, }); - return res.currentUser?.copilot?.contexts?.[0]?.id; + return res.currentUser?.copilot?.contexts?.[0]?.id || undefined; } async addContextDoc(options: OptionsField) { @@ -333,13 +333,23 @@ export class CopilotClient { return res.currentUser?.copilot?.contexts?.[0]; } - async matchContext(contextId: string, content: string, limit?: number) { + async matchContext( + content: string, + contextId?: string, + workspaceId?: string, + limit?: number, + scopedThreshold?: number, + threshold?: number + ) { const res = await this.gql({ query: matchContextQuery, variables: { - contextId, content, + contextId, + workspaceId, limit, + scopedThreshold, + threshold, }, }); const { matchFiles: files, matchWorkspaceDocs: docs } = diff --git a/packages/frontend/core/src/blocksuite/ai/provider/setup-provider.tsx b/packages/frontend/core/src/blocksuite/ai/provider/setup-provider.tsx index 5cc70a407e..dd02fddbf6 100644 --- a/packages/frontend/core/src/blocksuite/ai/provider/setup-provider.tsx +++ b/packages/frontend/core/src/blocksuite/ai/provider/setup-provider.tsx @@ -700,11 +700,21 @@ Could you make a new website based on these notes and send back just the html fi } }, matchContext: async ( - contextId: string, content: string, - limit?: number + contextId?: string, + workspaceId?: string, + limit?: number, + scopedThreshold?: number, + threshold?: number ) => { - return client.matchContext(contextId, content, limit); + return client.matchContext( + content, + contextId, + workspaceId, + limit, + scopedThreshold, + threshold + ); }, }); diff --git a/packages/frontend/i18n/src/i18n.gen.ts b/packages/frontend/i18n/src/i18n.gen.ts index 3f5b743ed3..8ba7865918 100644 --- a/packages/frontend/i18n/src/i18n.gen.ts +++ b/packages/frontend/i18n/src/i18n.gen.ts @@ -8412,6 +8412,13 @@ export function useAFFiNEI18N(): { contextId: string; message: string; }>): string; + /** + * `Failed to match context in workspace {{workspaceId}} with "%7B%7Bcontent%7D%7D": {{message}}` + */ + ["error.COPILOT_FAILED_TO_MATCH_GLOBAL_CONTEXT"](options: Readonly<{ + workspaceId: string; + message: string; + }>): string; /** * `Embedding feature is disabled, please contact the administrator to enable it in the workspace settings.` */ diff --git a/packages/frontend/i18n/src/resources/en.json b/packages/frontend/i18n/src/resources/en.json index cfbf4422bb..f365eb4eb6 100644 --- a/packages/frontend/i18n/src/resources/en.json +++ b/packages/frontend/i18n/src/resources/en.json @@ -2081,6 +2081,7 @@ "error.COPILOT_CONTEXT_FILE_NOT_SUPPORTED": "File {{fileName}} is not supported to use as context: {{message}}", "error.COPILOT_FAILED_TO_MODIFY_CONTEXT": "Failed to modify context {{contextId}}: {{message}}", "error.COPILOT_FAILED_TO_MATCH_CONTEXT": "Failed to match context {{contextId}} with \"%7B%7Bcontent%7D%7D\": {{message}}", + "error.COPILOT_FAILED_TO_MATCH_GLOBAL_CONTEXT": "Failed to match context in workspace {{workspaceId}} with \"%7B%7Bcontent%7D%7D\": {{message}}", "error.COPILOT_EMBEDDING_DISABLED": "Embedding feature is disabled, please contact the administrator to enable it in the workspace settings.", "error.COPILOT_EMBEDDING_UNAVAILABLE": "Embedding feature not available, you may need to install pgvector extension to your database", "error.COPILOT_TRANSCRIPTION_JOB_EXISTS": "Transcription job already exists",