mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 13:25:12 +00:00
fix(server): token calculate (#12667)
This commit is contained in:
@@ -15,16 +15,25 @@ pub fn from_model_name(model_name: String) -> Option<Tokenizer> {
|
||||
impl Tokenizer {
|
||||
#[napi]
|
||||
pub fn count(&self, content: String, allowed_special: Option<Vec<String>>) -> u32 {
|
||||
self
|
||||
.inner
|
||||
.encode(
|
||||
&content,
|
||||
if let Some(allowed_special) = &allowed_special {
|
||||
HashSet::from_iter(allowed_special.iter().map(|s| s.as_str()))
|
||||
} else {
|
||||
Default::default()
|
||||
},
|
||||
)
|
||||
.len() as u32
|
||||
let allowed_special = if let Some(allowed_special) = &allowed_special {
|
||||
HashSet::from_iter(allowed_special.iter().map(|s| s.as_str()))
|
||||
} else {
|
||||
Default::default()
|
||||
};
|
||||
|
||||
self.inner.encode(&content, &allowed_special).0.len() as u32
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_tokenizer() {
|
||||
let tokenizer = from_model_name("gpt-4.1".to_string()).unwrap();
|
||||
let content = "Hello, world!";
|
||||
let count = tokenizer.count(content.to_string(), None);
|
||||
assert!(count > 0);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user