From 119cf9442bc64114e4c0aa8b3fcfb56627c61160 Mon Sep 17 00:00:00 2001
From: akumatus <akumatus@gmail.com>
Date: Fri, 23 May 2025 01:58:01 +0000
Subject: [PATCH] feat(core): add gemini callout syntax highlighting (#12413)

Close [AI-125](https://linear.app/affine-design/issue/AI-125)

What Changed?
- Add `gemini-2.5-flash-preview-04-17` model
- Add `thinkingConfig` provider options
- Add callout syntax highlighting

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
  - Added support for the "Gemini 2.5 Flash" model and updated the "Gemini 2.5 Pro" model to a newer version.
  - Enhanced streaming responses to better format reasoning outputs and provide clearer callouts in AI-generated content.

- **Bug Fixes**
  - Improved audio transcription prompts in test cases for more accurate and explicit testing.

- **Documentation**
  - Expanded citation instructions for AI chat responses, including examples for multiple citations.

- **Chores**
  - Updated the "@ai-sdk/google" dependency to a newer version.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 packages/backend/server/package.json          |  2 +-
 .../src/__tests__/copilot-provider.spec.ts    |  6 +-
 .../src/__tests__/mocks/copilot.mock.ts       |  2 +-
 .../src/plugins/copilot/prompt/prompts.ts     | 10 +-
 .../plugins/copilot/providers/anthropic.ts    |  5 +-
 .../src/plugins/copilot/providers/gemini.ts   | 91 +++++++++++++++++--
 yarn.lock                                     |  4 +-
 7 files changed, 101 insertions(+), 19 deletions(-)

diff --git a/packages/backend/server/package.json b/packages/backend/server/package.json
index 91209eb1b1..2476bd800c 100644
--- a/packages/backend/server/package.json
+++ b/packages/backend/server/package.json
@@ -29,7 +29,7 @@
     "@affine/reader": "workspace:*",
     "@affine/server-native": "workspace:*",
     "@ai-sdk/anthropic": "^1.2.10",
-    "@ai-sdk/google": "^1.2.10",
+    "@ai-sdk/google": "^1.2.18",
     "@ai-sdk/openai": "^1.3.21",
     "@ai-sdk/perplexity": "^1.1.6",
     "@apollo/server": "^4.11.3",
diff --git a/packages/backend/server/src/__tests__/copilot-provider.spec.ts b/packages/backend/server/src/__tests__/copilot-provider.spec.ts
index e0ac21c0a1..29cea6ced7 100644
--- a/packages/backend/server/src/__tests__/copilot-provider.spec.ts
+++ b/packages/backend/server/src/__tests__/copilot-provider.spec.ts
@@ -328,7 +328,7 @@ const actions = [
     messages: [
       {
         role: 'user' as const,
-        content: '',
+        content: 'transcript the audio',
         attachments: [
           'https://cdn.affine.pro/copilot-test/MP9qDGuYgnY+ILoEAmHpp3h9Npuw2403EAYMEA.mp3',
         ],
@@ -350,7 +350,7 @@ const actions = [
     messages: [
       {
         role: 'user' as const,
-        content: '',
+        content: 'transcript the audio',
         attachments: [
           'https://cdn.affine.pro/copilot-test/2ed05eo1KvZ2tWB_BAjFo67EAPZZY-w4LylUAw.m4a',
         ],
@@ -372,7 +372,7 @@ const actions = [
     messages: [
       {
         role: 'user' as const,
-        content: '',
+        content: 'transcript the audio',
         attachments: [
           'https://cdn.affine.pro/copilot-test/nC9-e7P85PPI2rU29QWwf8slBNRMy92teLIIMw.opus',
         ],
diff --git a/packages/backend/server/src/__tests__/mocks/copilot.mock.ts b/packages/backend/server/src/__tests__/mocks/copilot.mock.ts
index a4cb5bd93f..2f30df562f 100644
--- a/packages/backend/server/src/__tests__/mocks/copilot.mock.ts
+++ b/packages/backend/server/src/__tests__/mocks/copilot.mock.ts
@@ -111,7 +111,7 @@ export class MockCopilotProvider extends OpenAIProvider {
       ],
     },
     {
-      id: 'gemini-2.5-pro-preview-03-25',
+      id: 'gemini-2.5-pro-preview-05-06',
       capabilities: [
         {
           input: [ModelInputType.Text, ModelInputType.Image],
diff --git a/packages/backend/server/src/plugins/copilot/prompt/prompts.ts b/packages/backend/server/src/plugins/copilot/prompt/prompts.ts
index 6c7a1f5a7c..21df22fa99 100644
--- a/packages/backend/server/src/plugins/copilot/prompt/prompts.ts
+++ b/packages/backend/server/src/plugins/copilot/prompt/prompts.ts
@@ -350,7 +350,7 @@ const actions: Prompt[] = [
   {
     name: 'Transcript audio',
     action: 'Transcript audio',
-    model: 'gemini-2.5-pro-preview-03-25',
+    model: 'gemini-2.5-pro-preview-05-06',
     messages: [
       {
         role: 'system',
@@ -1096,6 +1096,8 @@ const chat: Prompt[] = [
       'o4-mini',
       'claude-3-7-sonnet-20250219',
       'claude-3-5-sonnet-20241022',
+      'gemini-2.5-flash-preview-04-17',
+      'gemini-2.5-pro-preview-05-06',
     ],
     messages: [
       {
@@ -1122,11 +1124,12 @@ When referencing information from the provided documents, files or web search re
 1. Use markdown footnote format for citations
 2. Add citations immediately after the relevant sentence or paragraph
 3. Required format: [^reference_index] where reference_index is an increasing positive integer
-4. You MUST include citations at the end of your response in this exact format:
+4. When a single sentence needs multiple citations, write each marker in its own pair of brackets and place them consecutively. Correct: [^2][^4][^12], Incorrect: [^2, 4, 12].
+5. You MUST include citations at the end of your response in this exact format:
   - For documents: [^reference_index]:{"type":"doc","docId":"document_id"}
   - For files: [^reference_index]:{"type":"attachment","blobId":"blob_id","fileName":"file_name","fileType":"file_type"}
   - For web search results: [^reference_index]:{"type":"url","url":"url_path"}
-5. Ensure citations adhere strictly to the required format. Do not add extra spaces in citations like [^ reference_index] or [ ^reference_index].
+6. Ensure citations adhere strictly to the required format. Do not add extra spaces in citations like [^ reference_index] or [ ^reference_index].
 
 ### Citations Structure
 Your response MUST follow this structure:
@@ -1136,6 +1139,7 @@ Your response MUST follow this structure:
 
 Example Output with Citations:
 This is my response with a document citation[^1]. Here is more content with another file citation[^2]. And here is a web search result citation[^3].
+Here is multiple citations: [^1][^2][^3].
 
 [^1]:{"type":"doc","docId":"abc123"}
 [^2]:{"type":"attachment","blobId":"xyz789","fileName":"example.txt","fileType":"text"}
diff --git a/packages/backend/server/src/plugins/copilot/providers/anthropic.ts b/packages/backend/server/src/plugins/copilot/providers/anthropic.ts
index cba0b3b598..a94f440159 100644
--- a/packages/backend/server/src/plugins/copilot/providers/anthropic.ts
+++ b/packages/backend/server/src/plugins/copilot/providers/anthropic.ts
@@ -232,7 +232,7 @@ export class AnthropicProvider extends CopilotProvider<AnthropicConfig> {
 
   private getAnthropicOptions(options: CopilotChatOptions, model: string) {
     const result: AnthropicProviderOptions = {};
-    if (options?.reasoning && this.isThinkingModel(model)) {
+    if (options?.reasoning && this.isReasoningModel(model)) {
       result.thinking = {
         type: 'enabled',
         budgetTokens: 12000,
@@ -257,7 +257,8 @@ export class AnthropicProvider extends CopilotProvider<AnthropicConfig> {
     return text.replaceAll('\n', '\n> ');
   }
 
-  private isThinkingModel(model: string) {
+  private isReasoningModel(model: string) {
+    // only claude 3.7 sonnet supports reasoning config
     return model.startsWith('claude-3-7-sonnet');
   }
 }
diff --git a/packages/backend/server/src/plugins/copilot/providers/gemini.ts b/packages/backend/server/src/plugins/copilot/providers/gemini.ts
index d0d19397b2..5604363418 100644
--- a/packages/backend/server/src/plugins/copilot/providers/gemini.ts
+++ b/packages/backend/server/src/plugins/copilot/providers/gemini.ts
@@ -1,6 +1,7 @@
 import {
   createGoogleGenerativeAI,
   type GoogleGenerativeAIProvider,
+  type GoogleGenerativeAIProviderOptions,
 } from '@ai-sdk/google';
 import {
   AISDKError,
@@ -52,9 +53,23 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
         },
       ],
     },
+    {
+      name: 'Gemini 2.5 Flash',
+      id: 'gemini-2.5-flash-preview-04-17',
+      capabilities: [
+        {
+          input: [
+            ModelInputType.Text,
+            ModelInputType.Image,
+            ModelInputType.Audio,
+          ],
+          output: [ModelOutputType.Text, ModelOutputType.Structured],
+        },
+      ],
+    },
     {
       name: 'Gemini 2.5 Pro',
-      id: 'gemini-2.5-pro-preview-03-25',
+      id: 'gemini-2.5-pro-preview-05-06',
       capabilities: [
         {
           input: [
@@ -78,6 +93,10 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
     },
   ];
 
+  private readonly MAX_STEPS = 20;
+
+  private readonly CALLOUT_PREFIX = '\n> [!]\n> ';
+
   #instance!: GoogleGenerativeAIProvider;
 
   override configured(): boolean {
@@ -203,20 +222,55 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
       metrics.ai.counter('chat_text_stream_calls').add(1, { model: model.id });
       const [system, msgs] = await chatToGPTMessage(messages);
 
-      const { textStream } = streamText({
-        model: this.#instance(model.id),
+      const { fullStream } = streamText({
+        model: this.#instance(model.id, {
+          useSearchGrounding: this.useSearchGrounding(options),
+        }),
         system,
         messages: msgs,
         abortSignal: options.signal,
+        maxSteps: this.MAX_STEPS,
+        providerOptions: {
+          google: this.getGeminiOptions(options, model.id),
+        },
       });
 
-      for await (const message of textStream) {
-        if (message) {
-          yield message;
+      let lastType;
+      // reasoning, tool-call, tool-result need to mark as callout
+      let prefix: string | null = this.CALLOUT_PREFIX;
+      for await (const chunk of fullStream) {
+        if (chunk) {
+          switch (chunk.type) {
+            case 'text-delta': {
+              let result = chunk.textDelta;
+              if (lastType !== chunk.type) {
+                result = '\n\n' + result;
+              }
+              yield result;
+              break;
+            }
+            case 'reasoning': {
+              if (prefix) {
+                yield prefix;
+                prefix = null;
+              }
+              let result = chunk.textDelta;
+              if (lastType !== chunk.type) {
+                result = '\n\n' + result;
+              }
+              yield this.markAsCallout(result);
+              break;
+            }
+            case 'error': {
+              const error = chunk.error as { type: string; message: string };
+              throw new Error(error.message);
+            }
+          }
           if (options.signal?.aborted) {
-            await textStream.cancel();
+            await fullStream.cancel();
             break;
           }
+          lastType = chunk.type;
         }
       }
     } catch (e: any) {
@@ -224,4 +278,27 @@ export class GeminiProvider extends CopilotProvider<GeminiConfig> {
       throw this.handleError(e);
     }
   }
+
+  private getGeminiOptions(options: CopilotChatOptions, model: string) {
+    const result: GoogleGenerativeAIProviderOptions = {};
+    if (options?.reasoning && this.isReasoningModel(model)) {
+      result.thinkingConfig = {
+        thinkingBudget: 12000,
+        includeThoughts: true,
+      };
+    }
+    return result;
+  }
+
+  private markAsCallout(text: string) {
+    return text.replaceAll('\n', '\n> ');
+  }
+
+  private isReasoningModel(model: string) {
+    return model.startsWith('gemini-2.5');
+  }
+
+  private useSearchGrounding(options: CopilotChatOptions) {
+    return options?.tools?.includes('webSearch');
+  }
 }
diff --git a/yarn.lock b/yarn.lock
index a4494e69f7..dfa162efff 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -908,7 +908,7 @@ __metadata:
     "@affine/reader": "workspace:*"
     "@affine/server-native": "workspace:*"
     "@ai-sdk/anthropic": "npm:^1.2.10"
-    "@ai-sdk/google": "npm:^1.2.10"
+    "@ai-sdk/google": "npm:^1.2.18"
     "@ai-sdk/openai": "npm:^1.3.21"
     "@ai-sdk/perplexity": "npm:^1.1.6"
     "@apollo/server": "npm:^4.11.3"
@@ -1083,7 +1083,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@ai-sdk/google@npm:^1.2.10":
+"@ai-sdk/google@npm:^1.2.18":
   version: 1.2.18
   resolution: "@ai-sdk/google@npm:1.2.18"
   dependencies: