From b2654ffec775a24fcab9d49b0da1b3e4a9248203 Mon Sep 17 00:00:00 2001 From: akumatus Date: Tue, 18 Feb 2025 02:33:49 +0000 Subject: [PATCH] feat(core): add 100k tokens limit for the docs context (#10211) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support issue [BS-2352](https://linear.app/affine-design/issue/BS-2352). ![截屏2025-02-16 21.32.16.png](https://graphite-user-uploaded-assets-prod.s3.amazonaws.com/sJGviKxfE3Ap685cl5bj/813b2f67-918b-4c61-ba2e-b5066792a4fd.png) --- .../presets/ai/chat-panel/chat-context.ts | 1 + .../presets/ai/chat-panel/chat-panel-chips.ts | 41 +++++++++++++++++-- .../ai/chat-panel/components/doc-chip.ts | 33 ++++++++++----- .../presets/ai/chat-panel/components/utils.ts | 11 +++++ .../blocksuite/presets/ai/utils/extract.ts | 7 +--- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-context.ts b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-context.ts index 8a2a955c67..e1c077c8e7 100644 --- a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-context.ts +++ b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-context.ts @@ -78,6 +78,7 @@ export interface BaseChip { export interface DocChip extends BaseChip { docId: string; markdown?: Signal; + tokenCount?: number; } export interface FileChip extends BaseChip { diff --git a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-panel-chips.ts b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-panel-chips.ts index e051505ad5..877e141ee7 100644 --- a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-panel-chips.ts +++ b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/chat-panel-chips.ts @@ -12,8 +12,21 @@ import { repeat } from 'lit/directives/repeat.js'; import { AIProvider } from '../provider'; import type { DocDisplayConfig, DocSearchMenuConfig } from './chat-config'; -import type { BaseChip, ChatChip, ChatContextValue } from './chat-context'; -import { getChipKey, isDocChip, isFileChip } from './components/utils'; +import type { + ChatChip, + ChatContextValue, + DocChip, + FileChip, +} from './chat-context'; +import { + estimateTokenCount, + getChipKey, + isDocChip, + isFileChip, +} from './components/utils'; + +// 100k tokens limit for the docs context +const MAX_TOKEN_COUNT = 100000; export class ChatPanelChips extends WithDisposable(ShadowlessElement) { static override styles = css` @@ -91,6 +104,7 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) { .addChip=${this._addChip} .updateChip=${this._updateChip} .removeChip=${this._removeChip} + .checkTokenLimit=${this._checkTokenLimit} .docDisplayConfig=${this.docDisplayConfig} .host=${this.host} >`; @@ -189,7 +203,7 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) { private readonly _updateChip = ( chip: ChatChip, - options: Partial + options: Partial ) => { const index = this.chatContextValue.chips.findIndex(item => { if (isDocChip(chip)) { @@ -264,4 +278,25 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) { }); } }; + + private readonly _checkTokenLimit = ( + newChip: DocChip, + newTokenCount: number + ) => { + const estimatedTokens = this.chatContextValue.chips.reduce((acc, chip) => { + if (isFileChip(chip)) { + return acc; + } + if (chip.docId === newChip.docId) { + return acc + newTokenCount; + } + if (chip.markdown?.value && chip.state === 'success') { + const tokenCount = + chip.tokenCount ?? estimateTokenCount(chip.markdown.value); + return acc + tokenCount; + } + return acc; + }, 0); + return estimatedTokens <= MAX_TOKEN_COUNT; + }; } diff --git a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/doc-chip.ts b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/doc-chip.ts index c9f54befee..d9c54a48ac 100644 --- a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/doc-chip.ts +++ b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/doc-chip.ts @@ -13,8 +13,8 @@ import { property } from 'lit/decorators.js'; import { extractMarkdownFromDoc } from '../../utils/extract'; import type { DocDisplayConfig } from '../chat-config'; -import type { BaseChip, ChatChip, DocChip } from '../chat-context'; -import { getChipIcon, getChipTooltip } from './utils'; +import type { ChatChip, DocChip } from '../chat-context'; +import { estimateTokenCount, getChipIcon, getChipTooltip } from './utils'; const EXTRACT_DOC_THROTTLE = 1000; @@ -28,11 +28,17 @@ export class ChatPanelDocChip extends SignalWatcher( accessor addChip!: (chip: ChatChip) => void; @property({ attribute: false }) - accessor updateChip!: (chip: ChatChip, options: Partial) => void; + accessor updateChip!: (chip: ChatChip, options: Partial) => void; @property({ attribute: false }) accessor removeChip!: (chip: ChatChip) => void; + @property({ attribute: false }) + accessor checkTokenLimit!: ( + newChip: DocChip, + newTokenCount: number + ) => boolean; + @property({ attribute: false }) accessor docDisplayConfig!: DocDisplayConfig; @@ -103,15 +109,22 @@ export class ChatPanelDocChip extends SignalWatcher( if (!doc.ready) { doc.load(); } - const result = await extractMarkdownFromDoc(doc, this.host.std.provider); - if (this.chip.markdown) { - this.chip.markdown.value = result.markdown; + const value = await extractMarkdownFromDoc(doc, this.host.std.provider); + const tokenCount = estimateTokenCount(value); + if (this.checkTokenLimit(this.chip, tokenCount)) { + const markdown = this.chip.markdown ?? new Signal(''); + markdown.value = value; + this.updateChip(this.chip, { + state: 'success', + markdown, + tokenCount, + }); } else { - this.chip.markdown = new Signal(result.markdown); + this.updateChip(this.chip, { + state: 'failed', + tooltip: 'Content exceeds token limit', + }); } - this.updateChip(this.chip, { - state: 'success', - }); } catch (e) { this.updateChip(this.chip, { state: 'failed', diff --git a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/utils.ts b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/utils.ts index 10f11a2fed..72a2b85b88 100644 --- a/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/utils.ts +++ b/packages/frontend/core/src/blocksuite/presets/ai/chat-panel/components/utils.ts @@ -69,3 +69,14 @@ export function getChipKey(chip: ChatChip) { } return null; } + +export function estimateTokenCount(text: string): number { + const chinese = text.match(/[\u4e00-\u9fa5]/g)?.length || 0; + const english = text.replace(/[\u4e00-\u9fa5]/g, ''); + // Split English text into words by whitespace + const englishWords = english.trim().split(/\s+/).length; + + // Chinese characters: 1 character ≈ 2.5 tokens + // English words: 1 word ≈ 1.3 tokens + return Math.ceil(chinese * 2.5 + englishWords * 1.3); +} diff --git a/packages/frontend/core/src/blocksuite/presets/ai/utils/extract.ts b/packages/frontend/core/src/blocksuite/presets/ai/utils/extract.ts index 12b917ffc6..6129b105ad 100644 --- a/packages/frontend/core/src/blocksuite/presets/ai/utils/extract.ts +++ b/packages/frontend/core/src/blocksuite/presets/ai/utils/extract.ts @@ -158,7 +158,7 @@ export async function extractPageAll( export async function extractMarkdownFromDoc( doc: Store, provider: ServiceProvider -): Promise<{ docId: string; markdown: string }> { +): Promise { const transformer = await getTransformer(doc); const adapter = new MarkdownAdapter(transformer, provider); const blockModels = getNoteBlockModels(doc); @@ -177,10 +177,7 @@ export async function extractMarkdownFromDoc( snapshot, assets: transformer.assetsManager, }); - return { - docId: doc.id, - markdown: content.file, - }; + return content.file; } function getNoteBlockModels(doc: Store) {