feat(server): add document write tools for mcp (#14245)

## Summary

This PR adds write capabilities to AFFiNE's MCP (Model Context Protocol)
integration, enabling external tools (Claude, GPT, etc.) to create and
modify documents programmatically.

**New MCP Tools:**
- `create_document` - Create new documents from markdown content
- `update_document` - Update document content using structural diffing
for minimal changes (preserves document history and enables real-time
collaboration)

**Implementation:**
- `markdown_to_ydoc.rs` - Converts markdown to AFFiNE-compatible y-octo
binary format
- `markdown_utils.rs` - Shared markdown parsing utilities (used by both
ydoc-to-md and md-to-ydoc)
- `update_ydoc.rs` - Structural diffing implementation for updating
existing documents
- `DocWriter` service - TypeScript service for document operations
- Exposes `markdownToDocBinary` and `updateDocBinary` via napi bindings

**Supported Markdown Elements:**
- Headings (H1-H6)
- Paragraphs
- Bullet lists and numbered lists
- Code blocks (with language detection)
- Blockquotes
- Horizontal dividers
- Todo items (checkboxes)

**y-octo Changes:**
This PR reverts the y-octo sync (ca2462f, a5b60cf) which introduced a
concurrency bug causing hangs when creating documents with many nested
block structures. It also ports the improved `get_node_index` binary
search fix from upstream that prevents divide-by-zero panics when
decoding documents.

## Test Results 

### Unit Tests (47/47 passing)

| Test Suite | Tests | Status |
|------------|-------|--------|
| markdown_to_ydoc | 16/16 |  Pass |
| markdown_utils | 11/11 |  Pass |
| update_ydoc | 13/13 |  Pass |
| delta_markdown | 2/2 |  Pass |
| affine (doc parser) | 5/5 |  Pass |

### End-to-End MCP Testing 

Tested against local AFFiNE server with real MCP client requests:

| Tool | Result | Notes |
|------|--------|-------|
| `tools/list` |  Pass | Returns all 5 tools with correct schemas |
| `create_document` |  Pass | Successfully created test documents |
| `update_document` |  Pass | Successfully updated documents with
structural diffing |
| `read_document` |  Pass | Existing tool, works correctly |
| `keyword_search` |  Pass | Existing tool, works correctly |

**E2E Test Details:**
- Started local AFFiNE server with PostgreSQL, Redis, and Manticore
- Created test user and workspace via seed/GraphQL
- Verified MCP endpoint at `/api/workspaces/:workspaceId/mcp`
- Tested JSON-RPC calls with proper SSE streaming
- Confirmed documents are stored and indexed correctly (verified via
server logs)

## Test Plan
- [x] All Rust unit tests pass (47 tests)
- [x] Native bindings build successfully (release mode)
- [x] Document creation via MCP works end-to-end
- [x] Document update via MCP works end-to-end
- [x] CodeRabbit feedback addressed
- [ ] Integration testing with Claude/GPT MCP clients

Closes #14161

---

**Requested by:** @realies  
**Key guidance from:** @darkskygit (use y-octo instead of yjs for memory
efficiency)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Create documents from Markdown: generate new documents directly from
Markdown content with automatic title extraction
* Update documents with Markdown: modify existing documents using
Markdown as the source with automatic diff calculation for efficient
updates
* Copilot integration: new tools for document creation and updates
through Copilot's interface

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
realies
2026-01-16 14:57:24 +02:00
committed by GitHub
parent 2c5559ed0b
commit 0da91e406e
14 changed files with 2585 additions and 4 deletions

View File

@@ -11,6 +11,7 @@ import { DocEventsListener } from './event';
import { DocStorageCronJob } from './job';
import { DocStorageOptions } from './options';
import { DatabaseDocReader, DocReader, DocReaderProvider } from './reader';
import { DocWriter } from './writer';
@Module({
imports: [QuotaModule, PermissionModule, StorageModule],
@@ -22,10 +23,12 @@ import { DatabaseDocReader, DocReader, DocReaderProvider } from './reader';
DocReaderProvider,
DatabaseDocReader,
DocEventsListener,
DocWriter,
],
exports: [
DatabaseDocReader,
DocReader,
DocWriter,
PgWorkspaceDocStorageAdapter,
PgUserspaceDocStorageAdapter,
],
@@ -35,6 +38,7 @@ export {
// only for doc-service
DatabaseDocReader,
DocReader,
DocWriter,
PgUserspaceDocStorageAdapter,
PgWorkspaceDocStorageAdapter,
};

View File

@@ -0,0 +1,131 @@
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
import { nanoid } from 'nanoid';
import {
addDocToRootDoc,
markdownToDocBinary,
updateDocWithMarkdown,
} from '../../native';
import { PgWorkspaceDocStorageAdapter } from './adapters/workspace';
export interface CreateDocResult {
docId: string;
}
export interface UpdateDocResult {
success: boolean;
}
@Injectable()
export class DocWriter {
private readonly logger = new Logger(DocWriter.name);
constructor(private readonly storage: PgWorkspaceDocStorageAdapter) {}
/**
* Creates a new document from markdown content.
*
* @param workspaceId - The workspace ID
* @param markdown - The markdown content
* @param editorId - Optional editor ID for tracking
* @returns The created document ID
*/
async createDoc(
workspaceId: string,
markdown: string,
editorId?: string
): Promise<CreateDocResult> {
// Fetch workspace root doc first - reject if not found
// The root doc (docId = workspaceId) contains meta.pages array
const rootDoc = await this.storage.getDoc(workspaceId, workspaceId);
if (!rootDoc?.bin) {
throw new NotFoundException(
`Workspace ${workspaceId} not found or has no root document`
);
}
const rootDocBin = Buffer.isBuffer(rootDoc.bin)
? rootDoc.bin
: Buffer.from(
rootDoc.bin.buffer,
rootDoc.bin.byteOffset,
rootDoc.bin.byteLength
);
const docId = nanoid();
this.logger.debug(
`Creating doc ${docId} in workspace ${workspaceId} from markdown`
);
// Convert markdown to y-octo binary
const binary = markdownToDocBinary(markdown, docId);
// Extract title from markdown (first H1 heading)
const titleMatch = markdown.match(/^#\s+(.+?)(?:\s*#+)?\s*$/m);
const title = titleMatch ? titleMatch[1].trim() : undefined;
// Prepare root doc update to register the new document
const rootDocUpdate = addDocToRootDoc(rootDocBin, docId, title);
// Push both updates together - root doc first, then the new doc
await this.storage.pushDocUpdates(
workspaceId,
workspaceId,
[rootDocUpdate],
editorId
);
await this.storage.pushDocUpdates(workspaceId, docId, [binary], editorId);
this.logger.debug(
`Created and registered doc ${docId} in workspace ${workspaceId}`
);
return { docId };
}
/**
* Updates an existing document with new markdown content.
*
* Uses structural diffing to compute minimal changes between the existing
* document and new markdown, then applies only the delta. This preserves
* document history and enables proper CRDT merging with concurrent edits.
*
* @param workspaceId - The workspace ID
* @param docId - The document ID to update
* @param markdown - The new markdown content
* @param editorId - Optional editor ID for tracking
*/
async updateDoc(
workspaceId: string,
docId: string,
markdown: string,
editorId?: string
): Promise<UpdateDocResult> {
this.logger.debug(
`Updating doc ${docId} in workspace ${workspaceId} from markdown`
);
// Fetch existing document
const existingDoc = await this.storage.getDoc(workspaceId, docId);
if (!existingDoc?.bin) {
throw new NotFoundException(`Document ${docId} not found`);
}
// Compute delta update using structural diff
// Use zero-copy buffer view when possible for native function
const existingBinary = Buffer.isBuffer(existingDoc.bin)
? existingDoc.bin
: Buffer.from(
existingDoc.bin.buffer,
existingDoc.bin.byteOffset,
existingDoc.bin.byteLength
);
const delta = updateDocWithMarkdown(existingBinary, markdown, docId);
// Push only the delta changes
await this.storage.pushDocUpdates(workspaceId, docId, [delta], editorId);
return { success: true };
}
}

View File

@@ -49,3 +49,8 @@ export const readAllDocIdsFromRootDoc =
export const AFFINE_PRO_PUBLIC_KEY = serverNativeModule.AFFINE_PRO_PUBLIC_KEY;
export const AFFINE_PRO_LICENSE_AES_KEY =
serverNativeModule.AFFINE_PRO_LICENSE_AES_KEY;
// MCP write tools exports
export const markdownToDocBinary = serverNativeModule.markdownToDocBinary;
export const updateDocWithMarkdown = serverNativeModule.updateDocWithMarkdown;
export const addDocToRootDoc = serverNativeModule.addDocToRootDoc;

View File

@@ -4,7 +4,7 @@ import { Injectable } from '@nestjs/common';
import { pick } from 'lodash-es';
import z from 'zod/v3';
import { DocReader } from '../../../core/doc';
import { DocReader, DocWriter } from '../../../core/doc';
import { AccessController } from '../../../core/permission';
import { clearEmbeddingChunk } from '../../../models';
import { IndexerService } from '../../indexer';
@@ -15,6 +15,7 @@ export class WorkspaceMcpProvider {
constructor(
private readonly ac: AccessController,
private readonly reader: DocReader,
private readonly writer: DocWriter,
private readonly context: CopilotContextService,
private readonly indexer: IndexerService
) {}
@@ -165,6 +166,147 @@ export class WorkspaceMcpProvider {
}
);
// Write tools - create and update documents
server.registerTool(
'create_document',
{
title: 'Create Document',
description:
'Create a new document in the workspace with the given title and markdown content. Returns the ID of the created document.',
inputSchema: z.object({
title: z.string().min(1).describe('The title of the new document'),
content: z
.string()
.describe(
'The markdown content for the document body (should NOT include a title H1 - the title parameter will be used)'
),
}),
},
async ({ title, content }) => {
try {
// Check if user can create docs in this workspace
await this.ac
.user(userId)
.workspace(workspaceId)
.assert('Workspace.CreateDoc');
// Combine title and content into markdown
// Sanitize title by removing newlines and trimming
const sanitizedTitle = title.replace(/[\r\n]+/g, ' ').trim();
if (!sanitizedTitle) {
throw new Error('Title cannot be empty');
}
// Strip any leading H1 from content to prevent duplicates
// Per CommonMark spec, ATX headings allow only 0-3 spaces before the #
// Handles: "# Title", " # Title", "# Title #"
const strippedContent = content.replace(
/^[ \t]{0,3}#\s+[^\n]*#*\s*\n*/,
''
);
const markdown = `# ${sanitizedTitle}\n\n${strippedContent}`;
// Create the document
const result = await this.writer.createDoc(
workspaceId,
markdown,
userId
);
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
docId: result.docId,
message: `Document "${title}" created successfully`,
}),
},
],
} as const;
} catch (error) {
return {
isError: true,
content: [
{
type: 'text',
text: `Failed to create document: ${error instanceof Error ? error.message : 'Unknown error'}`,
},
],
};
}
}
);
server.registerTool(
'update_document',
{
title: 'Update Document',
description:
'Update an existing document with new markdown content. Uses structural diffing to apply minimal changes, preserving document history and enabling real-time collaboration.',
inputSchema: z.object({
docId: z.string().describe('The ID of the document to update'),
content: z
.string()
.describe(
'The complete new markdown content for the document (including title as H1)'
),
}),
},
async ({ docId, content }) => {
const notFoundError: CallToolResult = {
isError: true,
content: [
{
type: 'text',
text: `Doc with id ${docId} not found.`,
},
],
};
// Use can() instead of assert() to avoid leaking doc existence info
const accessible = await this.ac
.user(userId)
.workspace(workspaceId)
.doc(docId)
.can('Doc.Update');
if (!accessible) {
return notFoundError;
}
try {
// Update the document
await this.writer.updateDoc(workspaceId, docId, content, userId);
return {
content: [
{
type: 'text',
text: JSON.stringify({
success: true,
docId,
message: `Document updated successfully`,
}),
},
],
} as const;
} catch (error) {
return {
isError: true,
content: [
{
type: 'text',
text: `Failed to update document: ${error instanceof Error ? error.message : 'Unknown error'}`,
},
],
};
}
}
);
return server;
}
}