mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 21:27:20 +00:00
feat(core): add 100k tokens limit for the docs context (#10211)
Support issue [BS-2352](https://linear.app/affine-design/issue/BS-2352). 
This commit is contained in:
@@ -78,6 +78,7 @@ export interface BaseChip {
|
||||
export interface DocChip extends BaseChip {
|
||||
docId: string;
|
||||
markdown?: Signal<string>;
|
||||
tokenCount?: number;
|
||||
}
|
||||
|
||||
export interface FileChip extends BaseChip {
|
||||
|
||||
@@ -12,8 +12,21 @@ import { repeat } from 'lit/directives/repeat.js';
|
||||
|
||||
import { AIProvider } from '../provider';
|
||||
import type { DocDisplayConfig, DocSearchMenuConfig } from './chat-config';
|
||||
import type { BaseChip, ChatChip, ChatContextValue } from './chat-context';
|
||||
import { getChipKey, isDocChip, isFileChip } from './components/utils';
|
||||
import type {
|
||||
ChatChip,
|
||||
ChatContextValue,
|
||||
DocChip,
|
||||
FileChip,
|
||||
} from './chat-context';
|
||||
import {
|
||||
estimateTokenCount,
|
||||
getChipKey,
|
||||
isDocChip,
|
||||
isFileChip,
|
||||
} from './components/utils';
|
||||
|
||||
// 100k tokens limit for the docs context
|
||||
const MAX_TOKEN_COUNT = 100000;
|
||||
|
||||
export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
|
||||
static override styles = css`
|
||||
@@ -91,6 +104,7 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
|
||||
.addChip=${this._addChip}
|
||||
.updateChip=${this._updateChip}
|
||||
.removeChip=${this._removeChip}
|
||||
.checkTokenLimit=${this._checkTokenLimit}
|
||||
.docDisplayConfig=${this.docDisplayConfig}
|
||||
.host=${this.host}
|
||||
></chat-panel-doc-chip>`;
|
||||
@@ -189,7 +203,7 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
|
||||
|
||||
private readonly _updateChip = (
|
||||
chip: ChatChip,
|
||||
options: Partial<BaseChip>
|
||||
options: Partial<DocChip | FileChip>
|
||||
) => {
|
||||
const index = this.chatContextValue.chips.findIndex(item => {
|
||||
if (isDocChip(chip)) {
|
||||
@@ -264,4 +278,25 @@ export class ChatPanelChips extends WithDisposable(ShadowlessElement) {
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
private readonly _checkTokenLimit = (
|
||||
newChip: DocChip,
|
||||
newTokenCount: number
|
||||
) => {
|
||||
const estimatedTokens = this.chatContextValue.chips.reduce((acc, chip) => {
|
||||
if (isFileChip(chip)) {
|
||||
return acc;
|
||||
}
|
||||
if (chip.docId === newChip.docId) {
|
||||
return acc + newTokenCount;
|
||||
}
|
||||
if (chip.markdown?.value && chip.state === 'success') {
|
||||
const tokenCount =
|
||||
chip.tokenCount ?? estimateTokenCount(chip.markdown.value);
|
||||
return acc + tokenCount;
|
||||
}
|
||||
return acc;
|
||||
}, 0);
|
||||
return estimatedTokens <= MAX_TOKEN_COUNT;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@ import { property } from 'lit/decorators.js';
|
||||
|
||||
import { extractMarkdownFromDoc } from '../../utils/extract';
|
||||
import type { DocDisplayConfig } from '../chat-config';
|
||||
import type { BaseChip, ChatChip, DocChip } from '../chat-context';
|
||||
import { getChipIcon, getChipTooltip } from './utils';
|
||||
import type { ChatChip, DocChip } from '../chat-context';
|
||||
import { estimateTokenCount, getChipIcon, getChipTooltip } from './utils';
|
||||
|
||||
const EXTRACT_DOC_THROTTLE = 1000;
|
||||
|
||||
@@ -28,11 +28,17 @@ export class ChatPanelDocChip extends SignalWatcher(
|
||||
accessor addChip!: (chip: ChatChip) => void;
|
||||
|
||||
@property({ attribute: false })
|
||||
accessor updateChip!: (chip: ChatChip, options: Partial<BaseChip>) => void;
|
||||
accessor updateChip!: (chip: ChatChip, options: Partial<DocChip>) => void;
|
||||
|
||||
@property({ attribute: false })
|
||||
accessor removeChip!: (chip: ChatChip) => void;
|
||||
|
||||
@property({ attribute: false })
|
||||
accessor checkTokenLimit!: (
|
||||
newChip: DocChip,
|
||||
newTokenCount: number
|
||||
) => boolean;
|
||||
|
||||
@property({ attribute: false })
|
||||
accessor docDisplayConfig!: DocDisplayConfig;
|
||||
|
||||
@@ -103,15 +109,22 @@ export class ChatPanelDocChip extends SignalWatcher(
|
||||
if (!doc.ready) {
|
||||
doc.load();
|
||||
}
|
||||
const result = await extractMarkdownFromDoc(doc, this.host.std.provider);
|
||||
if (this.chip.markdown) {
|
||||
this.chip.markdown.value = result.markdown;
|
||||
const value = await extractMarkdownFromDoc(doc, this.host.std.provider);
|
||||
const tokenCount = estimateTokenCount(value);
|
||||
if (this.checkTokenLimit(this.chip, tokenCount)) {
|
||||
const markdown = this.chip.markdown ?? new Signal<string>('');
|
||||
markdown.value = value;
|
||||
this.updateChip(this.chip, {
|
||||
state: 'success',
|
||||
markdown,
|
||||
tokenCount,
|
||||
});
|
||||
} else {
|
||||
this.chip.markdown = new Signal<string>(result.markdown);
|
||||
this.updateChip(this.chip, {
|
||||
state: 'failed',
|
||||
tooltip: 'Content exceeds token limit',
|
||||
});
|
||||
}
|
||||
this.updateChip(this.chip, {
|
||||
state: 'success',
|
||||
});
|
||||
} catch (e) {
|
||||
this.updateChip(this.chip, {
|
||||
state: 'failed',
|
||||
|
||||
@@ -69,3 +69,14 @@ export function getChipKey(chip: ChatChip) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function estimateTokenCount(text: string): number {
|
||||
const chinese = text.match(/[\u4e00-\u9fa5]/g)?.length || 0;
|
||||
const english = text.replace(/[\u4e00-\u9fa5]/g, '');
|
||||
// Split English text into words by whitespace
|
||||
const englishWords = english.trim().split(/\s+/).length;
|
||||
|
||||
// Chinese characters: 1 character ≈ 2.5 tokens
|
||||
// English words: 1 word ≈ 1.3 tokens
|
||||
return Math.ceil(chinese * 2.5 + englishWords * 1.3);
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ export async function extractPageAll(
|
||||
export async function extractMarkdownFromDoc(
|
||||
doc: Store,
|
||||
provider: ServiceProvider
|
||||
): Promise<{ docId: string; markdown: string }> {
|
||||
): Promise<string> {
|
||||
const transformer = await getTransformer(doc);
|
||||
const adapter = new MarkdownAdapter(transformer, provider);
|
||||
const blockModels = getNoteBlockModels(doc);
|
||||
@@ -177,10 +177,7 @@ export async function extractMarkdownFromDoc(
|
||||
snapshot,
|
||||
assets: transformer.assetsManager,
|
||||
});
|
||||
return {
|
||||
docId: doc.id,
|
||||
markdown: content.file,
|
||||
};
|
||||
return content.file;
|
||||
}
|
||||
|
||||
function getNoteBlockModels(doc: Store) {
|
||||
|
||||
Reference in New Issue
Block a user