mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 21:05:19 +00:00
feat(server): use native tokenizer impl (#6960)
### Benchmark `yarn workspace @affine/server-native bench` ``` ┌─────────┬────────────┬─────────┬────────────────────┬──────────┬─────────┐ │ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │ ├─────────┼────────────┼─────────┼────────────────────┼──────────┼─────────┤ │ 0 │ 'tiktoken' │ '5' │ 176932518.76000002 │ '±4.71%' │ 100 │ │ 1 │ 'native' │ '16' │ 61041597.51000003 │ '±0.60%' │ 100 │ └─────────┴────────────┴─────────┴────────────────────┴──────────┴─────────┘ ```
This commit is contained in:
@@ -2,12 +2,17 @@
|
||||
|
||||
pub mod file_type;
|
||||
pub mod hashcash;
|
||||
pub mod tiktoken;
|
||||
|
||||
use std::fmt::{Debug, Display};
|
||||
|
||||
use napi::{bindgen_prelude::*, Error, Result, Status};
|
||||
use y_octo::Doc;
|
||||
|
||||
#[cfg(not(target_arch = "arm"))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
#[macro_use]
|
||||
extern crate napi_derive;
|
||||
|
||||
|
||||
30
packages/backend/native/src/tiktoken.rs
Normal file
30
packages/backend/native/src/tiktoken.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[napi]
|
||||
pub struct Tokenizer {
|
||||
inner: tiktoken_rs::CoreBPE,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn from_model_name(model_name: String) -> Option<Tokenizer> {
|
||||
let bpe = tiktoken_rs::get_bpe_from_model(&model_name).ok()?;
|
||||
Some(Tokenizer { inner: bpe })
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl Tokenizer {
|
||||
#[napi]
|
||||
pub fn count(&self, content: String, allowed_special: Option<Vec<String>>) -> u32 {
|
||||
self
|
||||
.inner
|
||||
.encode(
|
||||
&content,
|
||||
if let Some(allowed_special) = &allowed_special {
|
||||
HashSet::from_iter(allowed_special.iter().map(|s| s.as_str()))
|
||||
} else {
|
||||
Default::default()
|
||||
},
|
||||
)
|
||||
.len() as u32
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user