diff --git a/Cargo.lock b/Cargo.lock index c9cb28484c..971a8646cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -453,12 +453,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -1263,7 +1257,7 @@ name = "docx-parser" version = "0.1.1" source = "git+https://github.com/toeverything/docx-parser#278ba3eeb29bbf1ee7958b02436e4402af61859b" dependencies = [ - "base64 0.22.1", + "base64", "clap", "docx-rust", "serde", @@ -1826,7 +1820,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.57.0", + "windows-core 0.61.2", ] [[package]] @@ -1838,18 +1832,6 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke 0.7.5", - "zerofrom", - "zerovec 0.10.4", -] - [[package]] name = "icu_collections" version = "2.0.0" @@ -1858,9 +1840,25 @@ checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" dependencies = [ "displaydoc", "potential_utf", - "yoke 0.8.0", + "yoke", "zerofrom", - "zerovec 0.11.2", + "zerovec", +] + +[[package]] +name = "icu_locale" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ae5921528335e91da1b6c695dbf1ec37df5ac13faa3f91e5640be93aa2fbefd" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_locale_data", + "icu_provider", + "potential_utf", + "tinystr", + "zerovec", ] [[package]] @@ -1870,23 +1868,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" dependencies = [ "displaydoc", - "litemap 0.8.0", - "tinystr 0.8.1", - "writeable 0.6.1", - "zerovec 0.11.2", + "litemap", + "tinystr", + "writeable", + "zerovec", ] [[package]] -name = "icu_locid" -version = "1.5.0" +name = "icu_locale_data" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap 0.7.5", - "tinystr 0.7.6", - "writeable 0.5.5", -] +checksum = "4fdef0c124749d06a743c69e938350816554eb63ac979166590e2b4ee4252765" [[package]] name = "icu_normalizer" @@ -1895,12 +1887,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" dependencies = [ "displaydoc", - "icu_collections 2.0.0", + "icu_collections", "icu_normalizer_data", "icu_properties", - "icu_provider 2.0.0", + "icu_provider", "smallvec", - "zerovec 0.11.2", + "zerovec", ] [[package]] @@ -1916,13 +1908,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" dependencies = [ "displaydoc", - "icu_collections 2.0.0", + "icu_collections", "icu_locale_core", "icu_properties_data", - "icu_provider 2.0.0", + "icu_provider", "potential_utf", "zerotrie", - "zerovec 0.11.2", + "zerovec", ] [[package]] @@ -1931,23 +1923,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr 0.7.6", - "writeable 0.5.5", - "yoke 0.7.5", - "zerofrom", - "zerovec 0.10.4", -] - [[package]] name = "icu_provider" version = "2.0.0" @@ -1957,46 +1932,37 @@ dependencies = [ "displaydoc", "icu_locale_core", "stable_deref_trait", - "tinystr 0.8.1", - "writeable 0.6.1", - "yoke 0.8.0", + "tinystr", + "writeable", + "yoke", "zerofrom", "zerotrie", - "zerovec 0.11.2", -] - -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "zerovec", ] [[package]] name = "icu_segmenter" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a717725612346ffc2d7b42c94b820db6908048f39434504cb130e8b46256b0de" +checksum = "e185fc13b6401c138cf40db12b863b35f5edf31b88192a545857b41aeaf7d3d3" dependencies = [ "core_maths", "displaydoc", - "icu_collections 1.5.0", - "icu_locid", - "icu_provider 1.5.0", + "icu_collections", + "icu_locale", + "icu_locale_core", + "icu_provider", "icu_segmenter_data", + "potential_utf", "utf8_iter", - "zerovec 0.10.4", + "zerovec", ] [[package]] name = "icu_segmenter_data" -version = "1.5.1" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e52775179941363cc594e49ce99284d13d6948928d8e72c755f55e98caa1eb" +checksum = "5360a2fbe97f617c4f8b944356dedb36d423f7da7f13c070995cf89e59f01220" [[package]] name = "idna" @@ -2193,7 +2159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -2229,12 +2195,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" -[[package]] -name = "litemap" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" - [[package]] name = "litemap" version = "0.8.0" @@ -2963,7 +2923,8 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" dependencies = [ - "zerovec 0.11.2", + "serde", + "zerovec", ] [[package]] @@ -3770,7 +3731,7 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "chrono", "crc", @@ -3846,7 +3807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", - "base64 0.22.1", + "base64", "bitflags 2.9.1", "byteorder", "bytes", @@ -3889,7 +3850,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", - "base64 0.22.1", + "base64", "bitflags 2.9.1", "byteorder", "chrono", @@ -4300,14 +4261,14 @@ dependencies = [ [[package]] name = "text-splitter" -version = "0.25.1" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8130aecc3b7938ce3ea387d7615eca92bd4f702a5adc0548ba930a9c039dda4" +checksum = "9f7e97f5863248f7d07896a1816bd4110cb1b0b122741f157d702121a270bf33" dependencies = [ "ahash", "auto_enums", "either", - "icu_provider 1.5.0", + "icu_provider", "icu_segmenter", "itertools 0.14.0", "memchr", @@ -4378,29 +4339,19 @@ dependencies = [ [[package]] name = "tiktoken-rs" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6" +checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625" dependencies = [ "anyhow", - "base64 0.21.7", + "base64", "bstr", "fancy-regex", "lazy_static", - "parking_lot", "regex", "rustc-hash 1.1.0", ] -[[package]] -name = "tinystr" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" -dependencies = [ - "displaydoc", -] - [[package]] name = "tinystr" version = "0.8.1" @@ -4408,7 +4359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" dependencies = [ "displaydoc", - "zerovec 0.11.2", + "zerovec", ] [[package]] @@ -4590,9 +4541,9 @@ dependencies = [ [[package]] name = "tree-sitter-c" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" dependencies = [ "cc", "tree-sitter-language", @@ -5165,7 +5116,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -5504,12 +5455,6 @@ dependencies = [ "bitflags 2.9.1", ] -[[package]] -name = "writeable" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" - [[package]] name = "writeable" version = "0.6.1" @@ -5614,18 +5559,6 @@ dependencies = [ "yrs 0.17.4", ] -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive 0.7.5", - "zerofrom", -] - [[package]] name = "yoke" version = "0.8.0" @@ -5634,22 +5567,10 @@ checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" dependencies = [ "serde", "stable_deref_trait", - "yoke-derive 0.8.0", + "yoke-derive", "zerofrom", ] -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", - "synstructure", -] - [[package]] name = "yoke-derive" version = "0.8.0" @@ -5749,41 +5670,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" dependencies = [ "displaydoc", - "yoke 0.8.0", + "yoke", "zerofrom", ] -[[package]] -name = "zerovec" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" -dependencies = [ - "yoke 0.7.5", - "zerofrom", - "zerovec-derive 0.10.3", -] - [[package]] name = "zerovec" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" dependencies = [ - "yoke 0.8.0", + "yoke", "zerofrom", - "zerovec-derive 0.11.1", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "zerovec-derive", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f5e0cd8b5c..6c2495aa3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,12 +77,12 @@ smol_str = "0.3" sqlx = { version = "0.8", default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] } strum_macros = "0.27.0" symphonia = { version = "0.5", features = ["all", "opt-simd"] } -text-splitter = "0.25" +text-splitter = "0.27" thiserror = "2" -tiktoken-rs = "0.6" +tiktoken-rs = "0.7" tokio = "1.45" tree-sitter = { version = "0.25" } -tree-sitter-c = { version = "0.23" } +tree-sitter-c = { version = "0.24" } tree-sitter-c-sharp = { version = "0.23" } tree-sitter-cpp = { version = "0.23" } tree-sitter-go = { version = "0.23" } diff --git a/packages/backend/native/src/tiktoken.rs b/packages/backend/native/src/tiktoken.rs index 7c76219b71..919e952940 100644 --- a/packages/backend/native/src/tiktoken.rs +++ b/packages/backend/native/src/tiktoken.rs @@ -15,16 +15,25 @@ pub fn from_model_name(model_name: String) -> Option { impl Tokenizer { #[napi] pub fn count(&self, content: String, allowed_special: Option>) -> u32 { - self - .inner - .encode( - &content, - if let Some(allowed_special) = &allowed_special { - HashSet::from_iter(allowed_special.iter().map(|s| s.as_str())) - } else { - Default::default() - }, - ) - .len() as u32 + let allowed_special = if let Some(allowed_special) = &allowed_special { + HashSet::from_iter(allowed_special.iter().map(|s| s.as_str())) + } else { + Default::default() + }; + + self.inner.encode(&content, &allowed_special).0.len() as u32 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tokenizer() { + let tokenizer = from_model_name("gpt-4.1".to_string()).unwrap(); + let content = "Hello, world!"; + let count = tokenizer.count(content.to_string(), None); + assert!(count > 0); } } diff --git a/packages/backend/server/src/plugins/copilot/session.ts b/packages/backend/server/src/plugins/copilot/session.ts index 3693dbc7ee..56f4140a07 100644 --- a/packages/backend/server/src/plugins/copilot/session.ts +++ b/packages/backend/server/src/plugins/copilot/session.ts @@ -19,7 +19,6 @@ import { ChatMessageCache } from './message'; import { PromptService } from './prompt'; import { PromptMessage, PromptParams } from './providers'; import { - AvailableModel, ChatHistory, ChatMessage, ChatMessageSchema, @@ -38,7 +37,7 @@ export class ChatSession implements AsyncDisposable { private readonly messageCache: ChatMessageCache, private readonly state: ChatSessionState, private readonly dispose?: (state: ChatSessionState) => Promise, - private readonly maxTokenSize = 3840 + private readonly maxTokenSize = state.prompt.config?.maxTokens || 128 * 1024 ) {} get model() { @@ -297,8 +296,8 @@ export class ChatSessionService { messageCost: { increment: userMessages.length }, tokenCost: { increment: this.calculateTokenSize( - userMessages, - state.prompt.model as AvailableModel + state.messages, + state.prompt.model ), }, }, @@ -402,10 +401,7 @@ export class ChatSessionService { }); } - private calculateTokenSize( - messages: PromptMessage[], - model: AvailableModel - ): number { + private calculateTokenSize(messages: PromptMessage[], model: string): number { const encoder = getTokenEncoder(model); return messages .map(m => encoder?.count(m.content) ?? 0) diff --git a/packages/backend/server/src/plugins/copilot/types.ts b/packages/backend/server/src/plugins/copilot/types.ts index 9aefd70f55..07fb796b89 100644 --- a/packages/backend/server/src/plugins/copilot/types.ts +++ b/packages/backend/server/src/plugins/copilot/types.ts @@ -47,26 +47,24 @@ export enum AvailableModels { Gpt41 = 'gpt-4.1', Gpt410414 = 'gpt-4.1-2025-04-14', Gpt41Mini = 'gpt-4.1-mini', + Gpt41Nano = 'gpt-4.1-nano', // embeddings TextEmbedding3Large = 'text-embedding-3-large', TextEmbedding3Small = 'text-embedding-3-small', TextEmbeddingAda002 = 'text-embedding-ada-002', - // moderation - TextModerationLatest = 'text-moderation-latest', - TextModerationStable = 'text-moderation-stable', // text to image DallE3 = 'dall-e-3', + GptImage = 'gpt-image-1', } -export type AvailableModel = keyof typeof AvailableModels; +const availableModels = Object.values(AvailableModels); export function getTokenEncoder(model?: string | null): Tokenizer | null { if (!model) return null; - const modelStr = AvailableModels[model as AvailableModel]; - if (!modelStr) return null; - if (modelStr.startsWith('gpt')) { - return fromModelName(modelStr); - } else if (modelStr.startsWith('dall')) { + if (!availableModels.includes(model as AvailableModels)) return null; + if (model.startsWith('gpt')) { + return fromModelName(model); + } else if (model.startsWith('dall')) { // dalle don't need to calc the token return null; } else { diff --git a/packages/backend/server/src/schema.gql b/packages/backend/server/src/schema.gql index f9b8da63a4..cc26a936a4 100644 --- a/packages/backend/server/src/schema.gql +++ b/packages/backend/server/src/schema.gql @@ -262,12 +262,12 @@ enum CopilotModels { Gpt4OmniMini0718 Gpt41 Gpt41Mini + Gpt41Nano Gpt410414 + GptImage TextEmbedding3Large TextEmbedding3Small TextEmbeddingAda002 - TextModerationLatest - TextModerationStable } input CopilotPromptConfigInput { diff --git a/packages/common/graphql/src/schema.ts b/packages/common/graphql/src/schema.ts index ccebb1b72d..7f891fb1eb 100644 --- a/packages/common/graphql/src/schema.ts +++ b/packages/common/graphql/src/schema.ts @@ -356,12 +356,12 @@ export enum CopilotModels { Gpt4OmniMini0718 = 'Gpt4OmniMini0718', Gpt41 = 'Gpt41', Gpt41Mini = 'Gpt41Mini', + Gpt41Nano = 'Gpt41Nano', Gpt410414 = 'Gpt410414', + GptImage = 'GptImage', TextEmbedding3Large = 'TextEmbedding3Large', TextEmbedding3Small = 'TextEmbedding3Small', TextEmbeddingAda002 = 'TextEmbeddingAda002', - TextModerationLatest = 'TextModerationLatest', - TextModerationStable = 'TextModerationStable', } export interface CopilotPromptConfigInput {