feat(server): basic mcp server (#13298)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Introduced a new endpoint for MCP (Model Context Protocol) server
interaction under `/api/workspaces/:workspaceId/mcp`, enabling advanced
document reading and search capabilities within workspaces.
* Added support for semantic and keyword search tools, as well as
document reading through the MCP server, with user access control and
input validation.

* **Improvements**
* Enhanced metadata handling in semantic search results for improved
clarity.
* Streamlined internal imports and refactored utility functions for
better maintainability.

* **Chores**
  * Added a new SDK dependency to the backend server package.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Yii
2025-07-31 14:12:50 +08:00
committed by GitHub
parent 49e8f339d4
commit 8d889fc3c7
10 changed files with 294 additions and 47 deletions

View File

@@ -52,8 +52,7 @@ import { CopilotEmbeddingJob } from '../embedding';
import { COPILOT_LOCKER, CopilotType } from '../resolver';
import { ChatSessionService } from '../session';
import { CopilotStorage } from '../storage';
import { MAX_EMBEDDABLE_SIZE } from '../types';
import { getSignal, readStream } from '../utils';
import { getSignal, MAX_EMBEDDABLE_SIZE, readStream } from '../utils';
import { CopilotContextService } from './service';
@InputType()

View File

@@ -17,6 +17,8 @@ import {
import { CopilotController } from './controller';
import { CopilotCronJobs } from './cron';
import { CopilotEmbeddingJob } from './embedding';
import { WorkspaceMcpController } from './mcp/controller';
import { WorkspaceMcpProvider } from './mcp/provider';
import { ChatMessageCache } from './message';
import { PromptService } from './prompt';
import { CopilotProviderFactory, CopilotProviders } from './providers';
@@ -78,7 +80,9 @@ import {
UserCopilotResolver,
PromptsManagementResolver,
CopilotContextRootResolver,
// mcp
WorkspaceMcpProvider,
],
controllers: [CopilotController],
controllers: [CopilotController, WorkspaceMcpController],
})
export class CopilotModule {}

View File

@@ -0,0 +1,69 @@
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import {
Controller,
Delete,
Get,
HttpCode,
HttpStatus,
Logger,
Param,
Post,
Req,
Res,
} from '@nestjs/common';
import type { Request, Response } from 'express';
import { CurrentUser } from '../../../core/auth';
import { WorkspaceMcpProvider } from './provider';
@Controller('/api/workspaces/:workspaceId/mcp')
export class WorkspaceMcpController {
private readonly logger = new Logger(WorkspaceMcpController.name);
constructor(private readonly provider: WorkspaceMcpProvider) {}
@Get('/')
@Delete('/')
@HttpCode(HttpStatus.METHOD_NOT_ALLOWED)
async STATELESS_MCP_ENDPOINT() {
return {
jsonrpc: '2.0',
error: {
code: -32000,
message: 'Method not allowed.',
},
id: null,
};
}
@Post('/')
async mcp(
@Req() req: Request,
@Res() res: Response,
@CurrentUser() user: CurrentUser,
@Param('workspaceId') workspaceId: string
) {
let server = await this.provider.for(user.id, workspaceId);
const transport: StreamableHTTPServerTransport =
new StreamableHTTPServerTransport({
sessionIdGenerator: undefined,
});
const cleanup = () => {
transport.close().catch(e => {
this.logger.error('Failed to close MCP transport', e);
});
server.close().catch(e => {
this.logger.error('Failed to close MCP server', e);
});
};
try {
res.on('close', cleanup);
await server.connect(transport);
await transport.handleRequest(req, res, req.body);
} catch {
cleanup();
}
}
}

View File

@@ -0,0 +1,170 @@
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import { Injectable } from '@nestjs/common';
import { pick } from 'lodash-es';
import z from 'zod';
import { DocReader } from '../../../core/doc';
import { AccessController } from '../../../core/permission';
import { IndexerService } from '../../indexer';
import { CopilotContextService } from '../context';
import { clearEmbeddingChunk } from '../utils';
@Injectable()
export class WorkspaceMcpProvider {
constructor(
private readonly ac: AccessController,
private readonly reader: DocReader,
private readonly context: CopilotContextService,
private readonly indexer: IndexerService
) {}
async for(userId: string, workspaceId: string) {
await this.ac.user(userId).workspace(workspaceId).assert('Workspace.Read');
const server = new McpServer({
name: `AFFiNE MCP Server for Workspace ${workspaceId}`,
version: '1.0.0',
});
server.registerTool(
'read_document',
{
title: 'Read Document',
description: 'Read a document with given ID',
inputSchema: {
docId: z.string(),
},
},
async ({ docId }) => {
const notFoundError: CallToolResult = {
isError: true,
content: [
{
type: 'text',
text: `Doc with id ${docId} not found.`,
},
],
};
const accessible = await this.ac
.user(userId)
.workspace(workspaceId)
.doc(docId)
.can('Doc.Read');
if (!accessible) {
return notFoundError;
}
const content = await this.reader.getDocMarkdown(
workspaceId,
docId,
false
);
if (!content) {
return notFoundError;
}
return {
content: [
{
type: 'text',
text: content.markdown,
},
],
};
}
);
server.registerTool(
'semantic_search',
{
title: 'Semantic Search',
description:
'Retrieve conceptually related passages by performing vector-based semantic similarity search across embedded documents; use this tool only when exact keyword search fails or the user explicitly needs meaning-level matches (e.g., paraphrases, synonyms, broader concepts, recent documents).',
inputSchema: {
query: z.string(),
},
},
async ({ query }, req) => {
query = query.trim();
if (!query) {
return {
isError: true,
content: [
{
type: 'text',
text: 'Query is required for semantic search.',
},
],
};
}
const chunks = await this.context.matchWorkspaceDocs(
workspaceId,
query,
5,
req.signal
);
const docs = await this.ac
.user(userId)
.workspace(workspaceId)
.docs(
chunks.filter(c => 'docId' in c),
'Doc.Read'
);
return {
content: docs.map(doc => ({
type: 'text',
text: clearEmbeddingChunk(doc).content,
})),
};
}
);
server.registerTool(
'keyword_search',
{
title: 'Keyword Search',
description:
'Fuzzy search all workspace documents for the exact keyword or phrase supplied and return passages ranked by textual match. Use this tool by default whenever a straightforward term-based or keyword-base lookup is sufficient.',
inputSchema: {
query: z.string(),
},
},
async ({ query }) => {
query = query.trim();
if (!query) {
return {
isError: true,
content: [
{
type: 'text',
text: 'Query is required for keyword search.',
},
],
};
}
let docs = await this.indexer.searchDocsByKeyword(workspaceId, query);
docs = await this.ac
.user(userId)
.workspace(workspaceId)
.docs(docs, 'Doc.Read');
return {
content: docs.map(doc => ({
type: 'text',
text: JSON.stringify(pick(doc, 'docId', 'title', 'createdAt')),
})),
};
}
);
return server;
}
}

View File

@@ -7,34 +7,9 @@ import type { ChunkSimilarity, Models } from '../../../models';
import type { CopilotContextService } from '../context';
import type { ContextSession } from '../context/session';
import type { CopilotChatOptions } from '../providers';
import { clearEmbeddingChunk } from '../utils';
import { toolError } from './error';
const FILTER_PREFIX = [
'Title: ',
'Created at: ',
'Updated at: ',
'Created by: ',
'Updated by: ',
];
function clearEmbeddingChunk(chunk: ChunkSimilarity): ChunkSimilarity {
if (chunk.content) {
const lines = chunk.content.split('\n');
let maxLines = 5;
while (maxLines > 0 && lines.length > 0) {
if (FILTER_PREFIX.some(prefix => lines[0].startsWith(prefix))) {
lines.shift();
maxLines--;
} else {
// only process consecutive metadata rows
break;
}
}
return { ...chunk, content: lines.join('\n') };
}
return chunk;
}
export const buildDocSearchGetter = (
ac: AccessController,
context: CopilotContextService,

View File

@@ -1,6 +1,5 @@
import { z } from 'zod';
import { OneMB } from '../../base';
import type { ChatPrompt } from './prompt';
import { PromptMessageSchema, PureMessageSchema } from './providers';
@@ -130,5 +129,3 @@ export type CopilotContextFile = {
// embedding status
status: 'in_progress' | 'completed' | 'failed';
};
export const MAX_EMBEDDABLE_SIZE = 50 * OneMB;

View File

@@ -2,9 +2,12 @@ import { Readable } from 'node:stream';
import type { Request } from 'express';
import { readBufferWithLimit } from '../../base';
import { PromptTools } from './providers';
import { MAX_EMBEDDABLE_SIZE, ToolsConfig } from './types';
import { OneMB, readBufferWithLimit } from '../../base';
import type { ChunkSimilarity } from '../../models';
import type { PromptTools } from './providers';
import type { ToolsConfig } from './types';
export const MAX_EMBEDDABLE_SIZE = 50 * OneMB;
export function readStream(
readable: Readable,
@@ -80,3 +83,29 @@ export function getTools(
});
return result;
}
const FILTER_PREFIX = [
'Title: ',
'Created at: ',
'Updated at: ',
'Created by: ',
'Updated by: ',
];
export function clearEmbeddingChunk(chunk: ChunkSimilarity): ChunkSimilarity {
if (chunk.content) {
const lines = chunk.content.split('\n');
let maxLines = 5;
while (maxLines > 0 && lines.length > 0) {
if (FILTER_PREFIX.some(prefix => lines[0].startsWith(prefix))) {
lines.shift();
maxLines--;
} else {
// only process consecutive metadata rows
break;
}
}
return { ...chunk, content: lines.join('\n') };
}
return chunk;
}

View File

@@ -27,7 +27,7 @@ import { CurrentUser } from '../../../core/auth';
import { AccessController } from '../../../core/permission';
import { WorkspaceType } from '../../../core/workspaces';
import { COPILOT_LOCKER } from '../resolver';
import { MAX_EMBEDDABLE_SIZE } from '../types';
import { MAX_EMBEDDABLE_SIZE } from '../utils';
import { CopilotWorkspaceService } from './service';
import {
CopilotWorkspaceFileType,