feat(core): add 100k tokens limit for the docs context (#10211)

Support issue [BS-2352](https://linear.app/affine-design/issue/BS-2352).

![截屏2025-02-16 21.32.16.png](https://graphite-user-uploaded-assets-prod.s3.amazonaws.com/sJGviKxfE3Ap685cl5bj/813b2f67-918b-4c61-ba2e-b5066792a4fd.png)
This commit is contained in:
akumatus
2025-02-18 02:33:49 +00:00
parent 015452e8fb
commit b2654ffec7
5 changed files with 75 additions and 18 deletions

View File

@@ -78,6 +78,7 @@ export interface BaseChip {
export interface DocChip extends BaseChip {
docId: string;
markdown?: Signal<string>;
tokenCount?: number;
}
export interface FileChip extends BaseChip {

View File

@@ -12,8 +12,21 @@ import { repeat } from 'lit/directives/repeat.js';
import { AIProvider } from '../provider';
import type { DocDisplayConfig, DocSearchMenuConfig } from './chat-config';
import type { BaseChip, ChatChip, ChatContextValue } from './chat-context';
import { getChipKey, isDocChip, isFileChip } from './components/utils';
import type {
ChatChip,
ChatContextValue,
DocChip,
FileChip,
} from './chat-context';
import {
estimateTokenCount,
getChipKey,
isDocChip,
isFileChip,
} from './components/utils';
// 100k tokens limit for the docs context
const MAX_TOKEN_COUNT = 100000;
export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
static override styles = css`
@@ -91,6 +104,7 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
.addChip=${this._addChip}
.updateChip=${this._updateChip}
.removeChip=${this._removeChip}
.checkTokenLimit=${this._checkTokenLimit}
.docDisplayConfig=${this.docDisplayConfig}
.host=${this.host}
></chat-panel-doc-chip>`;
@@ -189,7 +203,7 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
private readonly _updateChip = (
chip: ChatChip,
options: Partial<BaseChip>
options: Partial<DocChip | FileChip>
) => {
const index = this.chatContextValue.chips.findIndex(item => {
if (isDocChip(chip)) {
@@ -264,4 +278,25 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
});
}
};
private readonly _checkTokenLimit = (
newChip: DocChip,
newTokenCount: number
) => {
const estimatedTokens = this.chatContextValue.chips.reduce((acc, chip) => {
if (isFileChip(chip)) {
return acc;
}
if (chip.docId === newChip.docId) {
return acc + newTokenCount;
}
if (chip.markdown?.value && chip.state === 'success') {
const tokenCount =
chip.tokenCount ?? estimateTokenCount(chip.markdown.value);
return acc + tokenCount;
}
return acc;
}, 0);
return estimatedTokens <= MAX_TOKEN_COUNT;
};
}

View File

@@ -13,8 +13,8 @@ import { property } from 'lit/decorators.js';
import { extractMarkdownFromDoc } from '../../utils/extract';
import type { DocDisplayConfig } from '../chat-config';
import type { BaseChip, ChatChip, DocChip } from '../chat-context';
import { getChipIcon, getChipTooltip } from './utils';
import type { ChatChip, DocChip } from '../chat-context';
import { estimateTokenCount, getChipIcon, getChipTooltip } from './utils';
const EXTRACT_DOC_THROTTLE = 1000;
@@ -28,11 +28,17 @@ export class ChatPanelDocChip extends SignalWatcher(
accessor addChip!: (chip: ChatChip) => void;
@property({ attribute: false })
accessor updateChip!: (chip: ChatChip, options: Partial<BaseChip>) => void;
accessor updateChip!: (chip: ChatChip, options: Partial<DocChip>) => void;
@property({ attribute: false })
accessor removeChip!: (chip: ChatChip) => void;
@property({ attribute: false })
accessor checkTokenLimit!: (
newChip: DocChip,
newTokenCount: number
) => boolean;
@property({ attribute: false })
accessor docDisplayConfig!: DocDisplayConfig;
@@ -103,15 +109,22 @@ export class ChatPanelDocChip extends SignalWatcher(
if (!doc.ready) {
doc.load();
}
const result = await extractMarkdownFromDoc(doc, this.host.std.provider);
if (this.chip.markdown) {
this.chip.markdown.value = result.markdown;
const value = await extractMarkdownFromDoc(doc, this.host.std.provider);
const tokenCount = estimateTokenCount(value);
if (this.checkTokenLimit(this.chip, tokenCount)) {
const markdown = this.chip.markdown ?? new Signal<string>('');
markdown.value = value;
this.updateChip(this.chip, {
state: 'success',
markdown,
tokenCount,
});
} else {
this.chip.markdown = new Signal<string>(result.markdown);
this.updateChip(this.chip, {
state: 'failed',
tooltip: 'Content exceeds token limit',
});
}
this.updateChip(this.chip, {
state: 'success',
});
} catch (e) {
this.updateChip(this.chip, {
state: 'failed',

View File

@@ -69,3 +69,14 @@ export function getChipKey(chip: ChatChip) {
}
return null;
}
export function estimateTokenCount(text: string): number {
const chinese = text.match(/[\u4e00-\u9fa5]/g)?.length || 0;
const english = text.replace(/[\u4e00-\u9fa5]/g, '');
// Split English text into words by whitespace
const englishWords = english.trim().split(/\s+/).length;
// Chinese characters: 1 character ≈ 2.5 tokens
// English words: 1 word ≈ 1.3 tokens
return Math.ceil(chinese * 2.5 + englishWords * 1.3);
}

View File

@@ -158,7 +158,7 @@ export async function extractPageAll(
export async function extractMarkdownFromDoc(
doc: Store,
provider: ServiceProvider
): Promise<{ docId: string; markdown: string }> {
): Promise<string> {
const transformer = await getTransformer(doc);
const adapter = new MarkdownAdapter(transformer, provider);
const blockModels = getNoteBlockModels(doc);
@@ -177,10 +177,7 @@ export async function extractMarkdownFromDoc(
snapshot,
assets: transformer.assetsManager,
});
return {
docId: doc.id,
markdown: content.file,
};
return content.file;
}
function getNoteBlockModels(doc: Store) {