feat(core): add exa url crawl tool (#12277)

Close [AI-126](https://linear.app/affine-design/issue/AI-126)

![截屏2025-05-14 17.01.19.png](https://graphite-user-uploaded-assets-prod.s3.amazonaws.com/sJGviKxfE3Ap685cl5bj/1a86ac68-f9f1-4740-8ddb-2293838682d2.png)

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->

## Summary by CodeRabbit

- **New Features**
  - Introduced a new web crawling tool, allowing users to extract live content from specific web pages in addition to traditional web search.
- **Improvements**
  - Enhanced error handling for web search and web crawl operations, providing clearer failure messages.
  - Updated terminology in AI prompts and user-facing messages to reflect the new web search/crawl capabilities.
  - Improved formatting of web search and crawl results for better readability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
akumatus
2025-05-15 05:22:55 +00:00
parent fcc9b31da9
commit fabcdd3b2c
4 changed files with 80 additions and 24 deletions

View File

@@ -1077,9 +1077,9 @@ const chat: Prompt[] = [
content: `You are AFFiNE AI, a professional and humorous copilot within AFFiNE. You are powered by latest GPT model from OpenAI and AFFiNE. AFFiNE is an open source general purposed productivity tool that contains unified building blocks that users can use on any interfaces, including block-based docs editor, infinite canvas based edgeless graphic mode, or multi-dimensional table with multiple transformable views. Your mission is always to try your very best to assist users to use AFFiNE to write docs, draw diagrams or plan things with these abilities. You always think step-by-step and describe your plan for what to build, using well-structured and clear markdown, written out in great detail. Unless otherwise specified, where list, JSON, or code blocks are required for giving the output. Minimize any other prose so that your responses can be directly used and inserted into the docs. You are able to access to API of AFFiNE to finish your job. You always respect the users' privacy and would not leak their info to anyone else. AFFiNE is made by Toeverything .Pte .Ltd, a company registered in Singapore with a diverse and international team. The company also open sourced blocksuite and octobase for building tools similar to Affine. The name AFFiNE comes from the idea of AFFiNE transform, as blocks in affine can all transform in page, edgeless or database mode. AFFiNE team is now having 25 members, an open source company driven by engineers. Today is: {{affine::date}}, User's preferred language is {{affine::language}}.
# Response Guide
Use the web search tool to gather information from the web if you have been equipped with it. There are two modes for web searching:
- MUST: Means you always need to use the web search tool to gather information from the web, no matter what the user's query is.
- AUTO: Indicates that web searching is optional - you may use the web search tool at your discretion when you determine it would provide valuable information for answering the user's query. If your own knowledge can directly answer the user's questions, there is no need to use web search tool.
Use the web search/crawl tool to gather information from the web if you have been equipped with it. There are two modes for web searching:
- MUST: Means you always need to use the web search/crawl tool to gather information from the web, no matter what the user's query is.
- AUTO: Indicates that web searching is optional - you may use the web search/crawl tool at your discretion when you determine it would provide valuable information for answering the user's query. If your own knowledge can directly answer the user's questions, there is no need to use web search/crawl tool.
Currently, you are in the {{searchMode}} web searching mode.
I will provide you with some content fragments. There are two types of content fragments:

View File

@@ -11,7 +11,7 @@ import {
metrics,
UserFriendlyError,
} from '../../../base';
import { createExaSearchTool } from '../tools';
import { createExaCrawlTool, createExaSearchTool } from '../tools';
import { CopilotProvider } from './provider';
import {
ChatMessageRole,
@@ -207,15 +207,23 @@ export class AnthropicProvider
yield prefix;
prefix = null;
}
if (chunk.toolName === 'web_search') {
if (chunk.toolName === 'web_search_exa') {
yield this.markAsCallout(
`\nSearching the web "${chunk.args.query}"\n`
);
}
if (chunk.toolName === 'web_crawl_exa') {
yield this.markAsCallout(
`\nCrawling the web "${chunk.args.url}"\n`
);
}
break;
}
case 'tool-result': {
if (chunk.toolName === 'web_search') {
if (
chunk.toolName === 'web_search_exa' &&
Array.isArray(chunk.result)
) {
if (prefix) {
yield prefix;
prefix = null;
@@ -243,7 +251,8 @@ export class AnthropicProvider
private getTools() {
return {
web_search: createExaSearchTool(this.AFFiNEConfig),
web_search_exa: createExaSearchTool(this.AFFiNEConfig),
web_crawl_exa: createExaCrawlTool(this.AFFiNEConfig),
};
}

View File

@@ -19,7 +19,7 @@ import {
metrics,
UserFriendlyError,
} from '../../../base';
import { createExaSearchTool } from '../tools';
import { createExaCrawlTool, createExaSearchTool } from '../tools';
import { CopilotProvider } from './provider';
import {
ChatMessageRole,
@@ -46,6 +46,7 @@ export type OpenAIConfig = {
type OpenAITools = {
web_search_preview: ReturnType<typeof openai.tools.webSearchPreview>;
web_search_exa: ReturnType<typeof createExaSearchTool>;
web_crawl_exa: ReturnType<typeof createExaCrawlTool>;
};
export class OpenAIProvider
@@ -202,6 +203,7 @@ export class OpenAIProvider
// o series reasoning models
if (model.startsWith('o')) {
tools.web_search_exa = createExaSearchTool(this.AFFiNEConfig);
tools.web_crawl_exa = createExaCrawlTool(this.AFFiNEConfig);
} else {
tools.web_search_preview = openai.tools.webSearchPreview();
}
@@ -330,10 +332,18 @@ export class OpenAIProvider
`\nSearching the web "${chunk.args.query}"\n`
);
}
if (chunk.toolName === 'web_crawl_exa') {
yield this.markAsCallout(
`\nCrawling the web "${chunk.args.url}"\n`
);
}
break;
}
case 'tool-result': {
if (chunk.toolName === 'web_search_exa') {
if (
chunk.toolName === 'web_search_exa' &&
Array.isArray(chunk.result)
) {
yield this.markAsCallout(
`\n${this.getWebSearchLinks(chunk.result)}\n`
);

View File

@@ -14,21 +14,58 @@ export const createExaSearchTool = (config: Config) => {
.describe('The mode to search the web for.'),
}),
execute: async ({ query, mode }) => {
const { key } = config.copilot.exa;
const exa = new Exa(key);
const result = await exa.searchAndContents(query, {
numResults: 10,
summary: true,
livecrawl: mode === 'MUST' ? 'always' : undefined,
});
return result.results.map(data => ({
title: data.title,
url: data.url,
summary: data.summary,
favicon: data.favicon,
publishedDate: data.publishedDate,
author: data.author,
}));
try {
const { key } = config.copilot.exa;
const exa = new Exa(key);
const result = await exa.searchAndContents(query, {
numResults: 10,
summary: true,
livecrawl: mode === 'MUST' ? 'always' : undefined,
});
return result.results.map(data => ({
title: data.title,
url: data.url,
content: data.summary,
favicon: data.favicon,
publishedDate: data.publishedDate,
author: data.author,
}));
} catch {
return 'Failed to search the web';
}
},
});
};
export const createExaCrawlTool = (config: Config) => {
return tool({
description: 'Crawl the web url for information',
parameters: z.object({
url: z
.string()
.describe('The URL to crawl (including http:// or https://)'),
}),
execute: async ({ url }) => {
try {
const { key } = config.copilot.exa;
const exa = new Exa(key);
const result = await exa.getContents([url], {
livecrawl: 'always',
text: {
maxCharacters: 100000,
},
});
return result.results.map(data => ({
title: data.title,
url: data.url,
content: data.text,
favicon: data.favicon,
publishedDate: data.publishedDate,
author: data.author,
}));
} catch {
return 'Failed to crawl the web url';
}
},
});
};