mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 21:05:19 +00:00
feat(native): doc loader for common native (#9941)
This commit is contained in:
37
packages/common/native/src/doc_loader/splitter/token.rs
Normal file
37
packages/common/native/src/doc_loader/splitter/token.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use text_splitter::ChunkConfig;
|
||||
|
||||
/**
|
||||
* modified from https://github.com/Abraxas-365/langchain-rust/tree/v4.6.0/src/text_splitter
|
||||
*/
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TokenSplitter {
|
||||
splitter_options: SplitterOptions,
|
||||
}
|
||||
|
||||
impl Default for TokenSplitter {
|
||||
fn default() -> Self {
|
||||
TokenSplitter::new(SplitterOptions::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenSplitter {
|
||||
pub fn new(options: SplitterOptions) -> TokenSplitter {
|
||||
TokenSplitter {
|
||||
splitter_options: options,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TextSplitter for TokenSplitter {
|
||||
fn split_text(&self, text: &str) -> Result<Vec<String>, TextSplitterError> {
|
||||
let chunk_config = ChunkConfig::try_from(&self.splitter_options)?;
|
||||
Ok(
|
||||
text_splitter::TextSplitter::new(chunk_config)
|
||||
.chunks(text)
|
||||
.map(|x| x.to_string())
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user