fix(server): token calculate (#12667)

This commit is contained in:
darkskygit
2025-06-04 07:09:33 +00:00
parent f54bc0c047
commit 13fa4f922a
7 changed files with 105 additions and 203 deletions

View File

@@ -15,16 +15,25 @@ pub fn from_model_name(model_name: String) -> Option<Tokenizer> {
impl Tokenizer {
#[napi]
pub fn count(&self, content: String, allowed_special: Option<Vec<String>>) -> u32 {
self
.inner
.encode(
&content,
if let Some(allowed_special) = &allowed_special {
HashSet::from_iter(allowed_special.iter().map(|s| s.as_str()))
} else {
Default::default()
},
)
.len() as u32
let allowed_special = if let Some(allowed_special) = &allowed_special {
HashSet::from_iter(allowed_special.iter().map(|s| s.as_str()))
} else {
Default::default()
};
self.inner.encode(&content, &allowed_special).0.len() as u32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenizer() {
let tokenizer = from_model_name("gpt-4.1".to_string()).unwrap();
let content = "Hello, world!";
let count = tokenizer.count(content.to_string(), None);
assert!(count > 0);
}
}