feat(native): sync yocto codes (#14243)

#### PR Dependency Tree


* **PR #14243** 👈

This tree was auto-generated by
[Charcoal](https://github.com/danerwilliams/charcoal)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Batch management API for coordinated document mutations and change
tracking.
* New document accessors (IDs, state snapshots, change/delete set
queries) and subscriber count.

* **Chores**
  * Upgraded Rust edition across packages to 2024.
  * Repository-wide formatting, stylistic cleanups and test adjustments.

* **Breaking Changes**
* Removed the Node native bindings package and its JS/TS declarations
and tests (no longer published/available).

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
DarkSky
2026-01-11 06:08:33 +08:00
committed by GitHub
parent d515d295ce
commit ca2462f987
143 changed files with 1396 additions and 4841 deletions

View File

@@ -1,6 +1,6 @@
use std::{
io::Cursor,
panic::{catch_unwind, AssertUnwindSafe},
panic::{AssertUnwindSafe, catch_unwind},
path::PathBuf,
};
@@ -22,9 +22,7 @@ pub struct DocOptions {
impl Default for DocOptions {
fn default() -> Self {
Self {
code_threshold: 1000,
}
Self { code_threshold: 1000 }
}
}
@@ -39,9 +37,7 @@ impl Doc {
}
pub fn with_options(file_path: &str, doc: &[u8], options: DocOptions) -> LoaderResult<Self> {
if let Some(kind) =
infer::get(&doc[..4096.min(doc.len())]).or(infer::get_from_path(file_path).ok().flatten())
{
if let Some(kind) = infer::get(&doc[..4096.min(doc.len())]).or(infer::get_from_path(file_path).ok().flatten()) {
if kind.extension() == "pdf" {
return Self::load_pdf(file_path, doc);
} else if kind.extension() == "docx" {
@@ -66,11 +62,10 @@ impl Doc {
}
"rs" | "c" | "cpp" | "h" | "hpp" | "js" | "ts" | "tsx" | "go" | "py" => {
let name = path.full_str().to_string();
let loader =
SourceCodeLoader::from_string(string).with_parser_option(LanguageParserOptions {
language: get_language_by_filename(&name)?,
parser_threshold: options.code_threshold,
});
let loader = SourceCodeLoader::from_string(string).with_parser_option(LanguageParserOptions {
language: get_language_by_filename(&name)?,
parser_threshold: options.code_threshold,
});
let splitter = TokenSplitter::default();
return Self::from_loader(file_path, loader, splitter);
}
@@ -89,10 +84,7 @@ impl Doc {
splitter: impl TextSplitter + 'static,
) -> Result<Doc, LoaderError> {
let name = file_path.to_string();
let chunks = catch_unwind(AssertUnwindSafe(|| {
Self::get_chunks_from_loader(loader, splitter)
}))
.map_err(|e| {
let chunks = catch_unwind(AssertUnwindSafe(|| Self::get_chunks_from_loader(loader, splitter))).map_err(|e| {
LoaderError::Other(match e.downcast::<String>() {
Ok(v) => *v,
Err(e) => match e.downcast::<&str>() {
@@ -124,8 +116,7 @@ impl Doc {
}
fn load_docx(file_path: &str, doc: &[u8]) -> LoaderResult<Self> {
let loader = DocxLoader::new(Cursor::new(doc))
.ok_or(LoaderError::Other("Failed to parse docx document".into()))?;
let loader = DocxLoader::new(Cursor::new(doc)).ok_or(LoaderError::Other("Failed to parse docx document".into()))?;
let splitter = TokenSplitter::default();
Self::from_loader(file_path, loader, splitter)
}
@@ -175,8 +166,7 @@ mod tests {
let buffer = read(fixtures.join(fixture)).unwrap();
let doc = Doc::with_options(fixture, &buffer, DocOptions { code_threshold: 0 }).unwrap();
for chunk in doc.chunks.iter() {
let output =
read_to_string(fixtures.join(format!("{}.{}.md", fixture, chunk.index))).unwrap();
let output = read_to_string(fixtures.join(format!("{}.{}.md", fixture, chunk.index))).unwrap();
assert_eq!(chunk.content, output);
}
}

View File

@@ -61,9 +61,7 @@ mod tests {
for (idx, doc) in documents.into_iter().enumerate() {
assert_eq!(
doc.page_content,
String::from_utf8_lossy(
&read(get_fixtures_path().join(format!("demo.docx.{}.md", idx))).unwrap()
)
String::from_utf8_lossy(&read(get_fixtures_path().join(format!("demo.docx.{}.md", idx))).unwrap())
);
}
}

View File

@@ -29,10 +29,8 @@ impl<R: Read> HtmlLoader<R> {
impl<R: Read + Send + Sync + 'static> Loader for HtmlLoader<R> {
fn load(mut self) -> LoaderResult<Vec<Document>> {
let cleaned_html = readability::extractor::extract(&mut self.html, &self.url)?;
let doc =
Document::new(format!("{}\n{}", cleaned_html.title, cleaned_html.text)).with_metadata(
HashMap::from([("source".to_string(), Value::from(self.url.as_str()))]),
);
let doc = Document::new(format!("{}\n{}", cleaned_html.title, cleaned_html.text))
.with_metadata(HashMap::from([("source".to_string(), Value::from(self.url.as_str()))]));
Ok(vec![doc])
}
@@ -46,10 +44,7 @@ mod tests {
fn test_html_loader() {
let input = "<p>Hello world!</p>";
let html_loader = HtmlLoader::new(
input.as_bytes(),
Url::parse("https://example.com/").unwrap(),
);
let html_loader = HtmlLoader::new(input.as_bytes(), Url::parse("https://example.com/").unwrap());
let documents = html_loader.load().unwrap();
@@ -66,16 +61,14 @@ mod tests {
#[test]
fn test_html_load_from_path() {
let buffer = include_bytes!("../../../fixtures/sample.html");
let html_loader = HtmlLoader::new(
Cursor::new(buffer),
Url::parse("https://example.com/").unwrap(),
);
let html_loader = HtmlLoader::new(Cursor::new(buffer), Url::parse("https://example.com/").unwrap());
let documents = html_loader.load().unwrap();
let expected = "Example Domain\n\n This domain is for use in illustrative examples in \
documents. You may\n use this domain in literature without prior \
coordination or asking for\n permission.\n More information...";
let expected =
"Example Domain\n\n This domain is for use in illustrative examples in documents. You may\n use \
this domain in literature without prior coordination or asking for\n permission.\n More \
information...";
assert_eq!(documents.len(), 1);
assert_eq!(

View File

@@ -23,6 +23,6 @@ pub trait Loader: Send + Sync {
pub use docx::DocxLoader;
pub use html::HtmlLoader;
pub use pdf::PdfExtractLoader;
pub use source::{get_language_by_filename, LanguageParserOptions, SourceCodeLoader};
pub use source::{LanguageParserOptions, SourceCodeLoader, get_language_by_filename};
pub use text::TextLoader;
pub use url::Url;

View File

@@ -1,4 +1,4 @@
use pdf_extract::{output_doc, output_doc_encrypted, PlainTextOutput};
use pdf_extract::{PlainTextOutput, output_doc, output_doc_encrypted};
/**
* modified from https://github.com/Abraxas-365/langchain-rust/tree/v4.6.0/src/document_loaders
@@ -72,8 +72,7 @@ mod tests {
assert_eq!(docs.len(), 1);
assert_eq!(
&docs[0].page_content[..100],
"\n\nSample PDF\nThis is a simple PDF file. Fun fun fun.\n\nLorem ipsum dolor sit amet, \
consectetuer a"
"\n\nSample PDF\nThis is a simple PDF file. Fun fun fun.\n\nLorem ipsum dolor sit amet, consectetuer a"
);
}

View File

@@ -3,7 +3,7 @@
*/
mod parser;
pub use parser::{get_language_by_filename, LanguageParser, LanguageParserOptions};
pub use parser::{LanguageParser, LanguageParserOptions, get_language_by_filename};
use super::*;

View File

@@ -59,11 +59,7 @@ pub struct LanguageParser {
impl Debug for LanguageParser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"LanguageParser {{ language: {:?} }}",
self.parser_options.language
)
write!(f, "LanguageParser {{ language: {:?} }}", self.parser_options.language)
}
}
@@ -77,10 +73,7 @@ impl Clone for LanguageParser {
}
pub fn get_language_by_filename(name: &str) -> LoaderResult<Language> {
let extension = name
.split('.')
.next_back()
.ok_or(LoaderError::UnsupportedLanguage)?;
let extension = name.split('.').next_back().ok_or(LoaderError::UnsupportedLanguage)?;
let language = match extension.to_lowercase().as_str() {
"rs" => Language::Rust,
"c" => Language::C,
@@ -133,10 +126,7 @@ impl LanguageParser {
impl LanguageParser {
pub fn parse_code(&mut self, code: &String) -> LoaderResult<Vec<Document>> {
let tree = self
.parser
.parse(code, None)
.ok_or(LoaderError::UnsupportedLanguage)?;
let tree = self.parser.parse(code, None).ok_or(LoaderError::UnsupportedLanguage)?;
if self.parser_options.parser_threshold > tree.root_node().end_position().row as u64 {
return Ok(vec![Document::new(code).with_metadata(HashMap::from([
(
@@ -152,11 +142,7 @@ impl LanguageParser {
self.extract_functions_classes(tree, code)
}
pub fn extract_functions_classes(
&self,
tree: Tree,
code: &String,
) -> LoaderResult<Vec<Document>> {
pub fn extract_functions_classes(&self, tree: Tree, code: &String) -> LoaderResult<Vec<Document>> {
let mut chunks = Vec::new();
let count = tree.root_node().child_count();

View File

@@ -10,9 +10,7 @@ pub struct TextLoader {
impl TextLoader {
pub fn new<T: Into<String>>(input: T) -> Self {
Self {
content: input.into(),
}
Self { content: input.into() }
}
}

View File

@@ -7,8 +7,8 @@ mod types;
pub use document::{Chunk, Doc};
pub use error::{LoaderError, LoaderResult};
use loader::{
get_language_by_filename, DocxLoader, HtmlLoader, LanguageParserOptions, Loader,
PdfExtractLoader, SourceCodeLoader, TextLoader, Url,
DocxLoader, HtmlLoader, LanguageParserOptions, Loader, PdfExtractLoader, SourceCodeLoader, TextLoader, Url,
get_language_by_filename,
};
use splitter::{MarkdownSplitter, TextSplitter, TextSplitterError, TokenSplitter};
use types::Document;

View File

@@ -2,7 +2,7 @@
* modified from https://github.com/Abraxas-365/langchain-rust/tree/v4.6.0/src/text_splitter
*/
use text_splitter::ChunkConfig;
use tiktoken_rs::{get_bpe_from_model, get_bpe_from_tokenizer, tokenizer::Tokenizer, CoreBPE};
use tiktoken_rs::{CoreBPE, get_bpe_from_model, get_bpe_from_tokenizer, tokenizer::Tokenizer};
use super::TextSplitterError;
@@ -79,8 +79,8 @@ impl TryFrom<&SplitterOptions> for ChunkConfig<CoreBPE> {
fn try_from(options: &SplitterOptions) -> Result<Self, Self::Error> {
let tk = if !options.encoding_name.is_empty() {
let tokenizer = SplitterOptions::get_tokenizer_from_str(&options.encoding_name)
.ok_or(TextSplitterError::TokenizerNotFound)?;
let tokenizer =
SplitterOptions::get_tokenizer_from_str(&options.encoding_name).ok_or(TextSplitterError::TokenizerNotFound)?;
get_bpe_from_tokenizer(tokenizer).map_err(|_| TextSplitterError::InvalidTokenizer)?
} else {

View File

@@ -5,16 +5,13 @@ use y_octo::{Any, DocOptions, JwstCodecError, Map, Value};
use super::{
blocksuite::{
collect_child_ids, get_block_id, get_flavour, get_list_depth, get_string, nearest_by_flavour,
DocContext,
DocContext, collect_child_ids, get_block_id, get_flavour, get_list_depth, get_string, nearest_by_flavour,
},
delta_markdown::{
delta_value_to_inline_markdown, extract_inline_references, text_to_inline_markdown,
text_to_markdown, DeltaToMdOptions,
},
value::{
any_as_string, any_truthy, build_reference_payload, params_value_to_json, value_to_string,
DeltaToMdOptions, delta_value_to_inline_markdown, extract_inline_references, text_to_inline_markdown,
text_to_markdown,
},
value::{any_as_string, any_truthy, build_reference_payload, params_value_to_json, value_to_string},
};
const SUMMARY_LIMIT: usize = 1000;
@@ -402,17 +399,10 @@ pub fn parse_doc_from_binary(doc_bin: Vec<u8>, doc_id: String) -> Result<CrawlRe
None => continue,
};
let parent_block = parent_block_id
.as_ref()
.and_then(|id| context.block_pool.get(id));
let parent_block = parent_block_id.as_ref().and_then(|id| context.block_pool.get(id));
let parent_flavour = parent_block.and_then(get_flavour);
let note_block = nearest_by_flavour(
&block_id,
NOTE_FLAVOUR,
&context.parent_lookup,
&context.block_pool,
);
let note_block = nearest_by_flavour(&block_id, NOTE_FLAVOUR, &context.parent_lookup, &context.block_pool);
let note_block_id = note_block.as_ref().and_then(get_block_id);
let display_mode = determine_display_mode(note_block.as_ref());
@@ -438,14 +428,9 @@ pub fn parse_doc_from_binary(doc_bin: Vec<u8>, doc_id: String) -> Result<CrawlRe
continue;
}
if matches!(
flavour.as_str(),
"affine:paragraph" | "affine:list" | "affine:code"
) {
if matches!(flavour.as_str(), "affine:paragraph" | "affine:list" | "affine:code") {
if let Some(text) = block.get("prop:text").and_then(|value| value.to_text()) {
let database_name = if flavour == "affine:paragraph"
&& parent_flavour.as_deref() == Some("affine:database")
{
let database_name = if flavour == "affine:paragraph" && parent_flavour.as_deref() == Some("affine:database") {
parent_block.and_then(|map| get_string(map, "prop:title"))
} else {
None
@@ -467,10 +452,7 @@ pub fn parse_doc_from_binary(doc_bin: Vec<u8>, doc_id: String) -> Result<CrawlRe
continue;
}
if matches!(
flavour.as_str(),
"affine:embed-linked-doc" | "affine:embed-synced-doc"
) {
if matches!(flavour.as_str(), "affine:embed-linked-doc" | "affine:embed-synced-doc") {
if let Some(page_id) = get_string(block, "prop:pageId") {
let mut info = build_block(None);
let payload = embed_ref_payload(block, &page_id);
@@ -515,11 +497,7 @@ pub fn parse_doc_from_binary(doc_bin: Vec<u8>, doc_id: String) -> Result<CrawlRe
&flavour,
parent_flavour.as_ref(),
parent_block_id.as_ref(),
compose_additional(
&display_mode,
note_block_id.as_ref(),
database_name.as_ref(),
),
compose_additional(&display_mode, note_block_id.as_ref(), database_name.as_ref()),
);
info.content = Some(texts);
blocks.push(info);
@@ -559,10 +537,7 @@ pub fn parse_doc_from_binary(doc_bin: Vec<u8>, doc_id: String) -> Result<CrawlRe
})
}
pub fn get_doc_ids_from_binary(
doc_bin: Vec<u8>,
include_trash: bool,
) -> Result<Vec<String>, ParseError> {
pub fn get_doc_ids_from_binary(doc_bin: Vec<u8>, include_trash: bool) -> Result<Vec<String>, ParseError> {
if doc_bin.is_empty() || doc_bin == [0, 0] {
return Err(ParseError::InvalidBinary);
}
@@ -695,10 +670,7 @@ fn compose_additional(
database_name: Option<&String>,
) -> Option<String> {
let mut payload = JsonMap::new();
payload.insert(
"displayMode".into(),
JsonValue::String(display_mode.to_string()),
);
payload.insert("displayMode".into(), JsonValue::String(display_mode.to_string()));
if let Some(note_id) = note_block_id {
payload.insert("noteBlockId".into(), JsonValue::String(note_id.clone()));
}
@@ -721,10 +693,7 @@ fn apply_doc_ref(info: &mut BlockInfo, page_id: String, payload: Option<String>)
}
fn embed_ref_payload(block: &Map, page_id: &str) -> Option<String> {
let params = block
.get("prop:params")
.as_ref()
.and_then(params_value_to_json);
let params = block.get("prop:params").as_ref().and_then(params_value_to_json);
Some(build_reference_payload(page_id, params))
}
@@ -746,10 +715,10 @@ fn gather_surface_texts(block: &Map) -> Vec<String> {
if let Some(value_map) = elements.get("value").and_then(|value| value.to_map()) {
for value in value_map.values() {
if let Some(element) = value.to_map() {
if let Some(text) = element.get("text").and_then(|value| value.to_text()) {
texts.push(text.to_string());
}
if let Some(element) = value.to_map()
&& let Some(text) = element.get("text").and_then(|value| value.to_text())
{
texts.push(text.to_string());
}
}
}
@@ -784,12 +753,12 @@ fn gather_database_texts(block: &Map) -> (Vec<String>, Option<String>) {
fn gather_table_contents(block: &Map) -> Vec<String> {
let mut contents = Vec::new();
for key in block.keys() {
if key.starts_with("prop:cells.") && key.ends_with(".text") {
if let Some(value) = block.get(key).and_then(|value| value_to_string(&value)) {
if !value.is_empty() {
contents.push(value);
}
}
if key.starts_with("prop:cells.")
&& key.ends_with(".text")
&& let Some(value) = block.get(key).and_then(|value| value_to_string(&value))
&& !value.is_empty()
{
contents.push(value);
}
}
contents
@@ -800,11 +769,7 @@ struct DatabaseTable {
rows: Vec<Vec<String>>,
}
fn build_database_table(
block: &Map,
context: &DocContext,
md_options: &DeltaToMdOptions,
) -> Option<DatabaseTable> {
fn build_database_table(block: &Map, context: &DocContext, md_options: &DeltaToMdOptions) -> Option<DatabaseTable> {
let columns = parse_database_columns(block)?;
let cells_map = block.get("prop:cells").and_then(|v| v.to_map())?;
let child_ids = collect_child_ids(block);
@@ -826,15 +791,14 @@ fn build_database_table(
cell_text = text;
}
}
} else if let Some(row_cells) = &row_cells {
if let Some(cell_val) = row_cells.get(&column.id).and_then(|v| v.to_map()) {
if let Some(value) = cell_val.get("value") {
if let Some(text_md) = delta_value_to_inline_markdown(&value, md_options) {
cell_text = text_md;
} else {
cell_text = format_cell_value(&value, column);
}
}
} else if let Some(row_cells) = &row_cells
&& let Some(cell_val) = row_cells.get(&column.id).and_then(|v| v.to_map())
&& let Some(value) = cell_val.get("value")
{
if let Some(text_md) = delta_value_to_inline_markdown(&value, md_options) {
cell_text = text_md;
} else {
cell_text = format_cell_value(&value, column);
}
}
@@ -852,26 +816,26 @@ fn append_database_summary(summary: &mut String, block: &Map, context: &DocConte
return;
};
if let Some(title) = get_string(block, "prop:title") {
if !title.is_empty() {
summary.push_str(&title);
summary.push('|');
}
if let Some(title) = get_string(block, "prop:title")
&& !title.is_empty()
{
summary.push_str(&title);
summary.push('|');
}
for column in table.columns.iter() {
if let Some(name) = column.name.as_ref() {
if !name.is_empty() {
summary.push_str(name);
summary.push('|');
}
if let Some(name) = column.name.as_ref()
&& !name.is_empty()
{
summary.push_str(name);
summary.push('|');
}
for option in column.options.iter() {
if let Some(value) = option.value.as_ref() {
if !value.is_empty() {
summary.push_str(value);
summary.push('|');
}
if let Some(value) = option.value.as_ref()
&& !value.is_empty()
{
summary.push_str(value);
summary.push('|');
}
}
}
@@ -920,9 +884,7 @@ struct DatabaseColumn {
}
fn parse_database_columns(block: &Map) -> Option<Vec<DatabaseColumn>> {
let columns = block
.get("prop:columns")
.and_then(|value| value.to_array())?;
let columns = block.get("prop:columns").and_then(|value| value.to_array())?;
let mut parsed = Vec::new();
for column_value in columns.iter() {
if let Some(column) = column_value.to_map() {
@@ -967,9 +929,7 @@ fn format_option_tag(option: &DatabaseOption) -> String {
let value = option.value.as_deref().unwrap_or_default();
let color = option.color.as_deref().unwrap_or_default();
format!(
"<span data-affine-option data-value=\"{id}\" data-option-color=\"{color}\">{value}</span>"
)
format!("<span data-affine-option data-value=\"{id}\" data-option-color=\"{color}\">{value}</span>")
}
fn format_cell_value(value: &Value, column: &DatabaseColumn) -> String {
@@ -991,15 +951,8 @@ fn format_cell_value(value: &Value, column: &DatabaseColumn) -> String {
}
"multi-select" => {
let ids: Vec<String> = match value {
Value::Any(Any::Array(ids)) => ids
.iter()
.filter_map(any_as_string)
.map(str::to_string)
.collect(),
Value::Array(array) => array
.iter()
.filter_map(|id_val| value_to_string(&id_val))
.collect(),
Value::Any(Any::Array(ids)) => ids.iter().filter_map(any_as_string).map(str::to_string).collect(),
Value::Array(array) => array.iter().filter_map(|id_val| value_to_string(&id_val)).collect(),
_ => Vec::new(),
};

View File

@@ -25,13 +25,13 @@ pub(super) fn build_block_index(blocks_map: &Map) -> BlockIndex {
let mut parent_lookup: HashMap<String, String> = HashMap::new();
for (_, value) in blocks_map.iter() {
if let Some(block_map) = value.to_map() {
if let Some(block_id) = get_block_id(&block_map) {
for child_id in collect_child_ids(&block_map) {
parent_lookup.insert(child_id, block_id.clone());
}
block_pool.insert(block_id, block_map);
if let Some(block_map) = value.to_map()
&& let Some(block_id) = get_block_id(&block_map)
{
for child_id in collect_child_ids(&block_map) {
parent_lookup.insert(child_id, block_id.clone());
}
block_pool.insert(block_id, block_map);
}
}
@@ -80,18 +80,13 @@ impl BlockWalker {
let mut child_ids = collect_child_ids(block);
for child_id in child_ids.drain(..).rev() {
if self.visited.insert(child_id.clone()) {
self
.queue
.push((Some(parent_block_id.to_string()), child_id));
self.queue.push((Some(parent_block_id.to_string()), child_id));
}
}
}
}
pub(super) fn find_block_id_by_flavour(
block_pool: &HashMap<String, Map>,
flavour: &str,
) -> Option<String> {
pub(super) fn find_block_id_by_flavour(block_pool: &HashMap<String, Map>, flavour: &str) -> Option<String> {
block_pool.iter().find_map(|(id, block)| {
get_flavour(block)
.filter(|block_flavour| block_flavour == flavour)
@@ -133,12 +128,12 @@ pub(super) fn get_list_depth(
let mut current_id = block_id.to_string();
while let Some(parent_id) = parent_lookup.get(&current_id) {
if let Some(parent_block) = blocks.get(parent_id) {
if get_flavour(parent_block).as_deref() == Some("affine:list") {
depth += 1;
current_id = parent_id.clone();
continue;
}
if let Some(parent_block) = blocks.get(parent_id)
&& get_flavour(parent_block).as_deref() == Some("affine:list")
{
depth += 1;
current_id = parent_id.clone();
continue;
}
break;
}
@@ -153,10 +148,10 @@ pub(super) fn nearest_by_flavour(
) -> Option<Map> {
let mut cursor = Some(start.to_string());
while let Some(node) = cursor {
if let Some(block) = blocks.get(&node) {
if get_flavour(block).as_deref() == Some(flavour) {
return Some(block.clone());
}
if let Some(block) = blocks.get(&node)
&& get_flavour(block).as_deref() == Some(flavour)
{
return Some(block.clone());
}
cursor = parent_lookup.get(&node).cloned();
}

View File

@@ -7,8 +7,7 @@ use std::{
use y_octo::{AHashMap, Any, Map, Text, TextAttributes, TextDeltaOp, TextInsert, Value};
use super::value::{
any_as_string, any_as_u64, any_truthy, build_reference_payload, params_any_map_to_json,
value_to_any,
any_as_string, any_as_u64, any_truthy, build_reference_payload, params_any_map_to_json, value_to_any,
};
#[derive(Debug, Clone)]
@@ -45,12 +44,7 @@ impl DeltaToMdOptions {
}
let mut parts = Vec::new();
parts.push(
reference
.ref_type
.clone()
.unwrap_or_else(|| "LinkedPage".into()),
);
parts.push(reference.ref_type.clone().unwrap_or_else(|| "LinkedPage".into()));
parts.push(reference.page_id.clone());
if let Some(mode) = reference.mode.as_ref() {
parts.push(mode.clone());
@@ -60,22 +54,14 @@ impl DeltaToMdOptions {
}
}
pub(super) fn text_to_markdown(
block: &Map,
key: &str,
options: &DeltaToMdOptions,
) -> Option<String> {
pub(super) fn text_to_markdown(block: &Map, key: &str, options: &DeltaToMdOptions) -> Option<String> {
block
.get(key)
.and_then(|value| value.to_text())
.map(|text| delta_to_markdown(&text, options))
}
pub(super) fn text_to_inline_markdown(
block: &Map,
key: &str,
options: &DeltaToMdOptions,
) -> Option<String> {
pub(super) fn text_to_inline_markdown(block: &Map, key: &str, options: &DeltaToMdOptions) -> Option<String> {
block
.get(key)
.and_then(|value| value.to_text())
@@ -89,8 +75,7 @@ pub(super) fn extract_inline_references(delta: &[TextDeltaOp]) -> Vec<InlineRefe
for op in delta {
let attrs = match op {
TextDeltaOp::Insert {
format: Some(format),
..
format: Some(format), ..
} => format,
_ => continue,
};
@@ -123,10 +108,7 @@ fn parse_inline_reference(value: &Any) -> Option<InlineReference> {
_ => return None,
};
let page_id = map
.get("pageId")
.and_then(any_as_string)
.map(str::to_string)?;
let page_id = map.get("pageId").and_then(any_as_string).map(str::to_string)?;
let title = map.get("title").and_then(any_as_string).map(str::to_string);
let ref_type = map.get("type").and_then(any_as_string).map(str::to_string);
let params = map.get("params").and_then(|value| match value {
@@ -161,20 +143,12 @@ fn delta_to_inline_markdown(text: &Text, options: &DeltaToMdOptions) -> String {
delta_to_markdown_with_options(&text.to_delta(), options, false)
}
fn delta_to_markdown_with_options(
delta: &[TextDeltaOp],
options: &DeltaToMdOptions,
trailing_newline: bool,
) -> String {
fn delta_to_markdown_with_options(delta: &[TextDeltaOp], options: &DeltaToMdOptions, trailing_newline: bool) -> String {
let ops = build_delta_ops(delta);
delta_ops_to_markdown_with_options(&ops, options, trailing_newline)
}
fn delta_ops_to_markdown_with_options(
ops: &[DeltaOp],
options: &DeltaToMdOptions,
trailing_newline: bool,
) -> String {
fn delta_ops_to_markdown_with_options(ops: &[DeltaOp], options: &DeltaToMdOptions, trailing_newline: bool) -> String {
let root = convert_delta_ops(ops, options);
let mut rendered = render_node(&root);
rendered = rendered.trim_end().to_string();
@@ -235,10 +209,7 @@ fn delta_op_from_any(value: &Any) -> Option<DeltaOp> {
_ => DeltaInsert::Embed(vec![insert_value.clone()]),
};
let attributes = map
.get("attributes")
.and_then(any_to_attributes)
.unwrap_or_default();
let attributes = map.get("attributes").and_then(any_to_attributes).unwrap_or_default();
Some(DeltaOp { insert, attributes })
}
@@ -260,10 +231,7 @@ fn delta_any_to_inline_markdown(value: &Any, options: &DeltaToMdOptions) -> Opti
delta_ops_from_any(value).map(|ops| delta_ops_to_markdown_with_options(&ops, options, false))
}
pub(super) fn delta_value_to_inline_markdown(
value: &Value,
options: &DeltaToMdOptions,
) -> Option<String> {
pub(super) fn delta_value_to_inline_markdown(value: &Value, options: &DeltaToMdOptions) -> Option<String> {
if let Some(text) = value.to_text() {
return Some(delta_to_inline_markdown(&text, options));
}
@@ -428,13 +396,7 @@ fn convert_delta_ops(ops: &[DeltaOp], options: &DeltaToMdOptions) -> Rc<RefCell<
}
}
apply_inline_attributes(
&mut el,
&op.attributes,
next_attrs,
&mut active_inline,
options,
);
apply_inline_attributes(&mut el, &op.attributes, next_attrs, &mut active_inline, options);
Node::append(&el, Node::new_text(segment));
if line_index + 1 < lines.len() {
new_line(&root, &mut line, &mut el, &mut active_inline);
@@ -504,10 +466,10 @@ fn apply_inline_attributes(
if !is_inline_attribute(attr) || !any_truthy(value) {
continue;
}
if let Some(active) = active_inline.get(attr) {
if active == value {
continue;
}
if let Some(active) = active_inline.get(attr)
&& active == value
{
continue;
}
let next_matches = next
@@ -532,11 +494,7 @@ fn apply_inline_attributes(
}
}
fn inline_node_for_attr(
attr: &str,
attrs: &TextAttributes,
options: &DeltaToMdOptions,
) -> Option<Rc<RefCell<Node>>> {
fn inline_node_for_attr(attr: &str, attrs: &TextAttributes, options: &DeltaToMdOptions) -> Option<Rc<RefCell<Node>>> {
match attr {
"italic" => Some(Node::new_inline("_", "_")),
"bold" => Some(Node::new_inline("**", "**")),
@@ -544,13 +502,10 @@ fn inline_node_for_attr(
.get(attr)
.and_then(any_as_string)
.map(|url| Node::new_inline("[", &format!("]({url})"))),
"reference" => attrs
.get(attr)
.and_then(parse_inline_reference)
.map(|reference| {
let (title, link) = options.build_reference_link(&reference);
Node::new_inline("[", &format!("{title}]({link})"))
}),
"reference" => attrs.get(attr).and_then(parse_inline_reference).map(|reference| {
let (title, link) = options.build_reference_link(&reference);
Node::new_inline("[", &format!("{title}]({link})"))
}),
"strike" => Some(Node::new_inline("~~", "~~")),
"code" => Some(Node::new_inline("`", "`")),
_ => None,
@@ -562,10 +517,7 @@ fn has_block_level_attribute(attrs: &TextAttributes) -> bool {
}
fn is_inline_attribute(attr: &str) -> bool {
matches!(
attr,
"italic" | "bold" | "link" | "reference" | "strike" | "code"
)
matches!(attr, "italic" | "bold" | "link" | "reference" | "strike" | "code")
}
fn encode_link(link: &str) -> String {
@@ -683,9 +635,7 @@ impl Node {
fn append(parent: &Rc<RefCell<Node>>, child: Rc<RefCell<Node>>) {
if let Some(old_parent) = child.borrow().parent.as_ref().and_then(|p| p.upgrade()) {
let mut old_parent = old_parent.borrow_mut();
old_parent
.children
.retain(|existing| !Rc::ptr_eq(existing, &child));
old_parent.children.retain(|existing| !Rc::ptr_eq(existing, &child));
}
child.borrow_mut().parent = Some(Rc::downgrade(parent));

View File

@@ -4,7 +4,6 @@ mod delta_markdown;
mod value;
pub use affine::{
get_doc_ids_from_binary, parse_doc_from_binary, parse_doc_to_markdown, parse_page_doc,
parse_workspace_doc, BlockInfo, CrawlResult, MarkdownResult, PageDocContent, ParseError,
WorkspaceDocContent,
BlockInfo, CrawlResult, MarkdownResult, PageDocContent, ParseError, WorkspaceDocContent, get_doc_ids_from_binary,
parse_doc_from_binary, parse_doc_to_markdown, parse_page_doc, parse_workspace_doc,
};

View File

@@ -34,11 +34,7 @@ impl Stamp {
}
pub fn check<S: AsRef<str>>(&self, bits: u32, resource: S) -> bool {
if self.version == "1"
&& bits <= self.claim
&& self.check_expiration()
&& self.resource == resource.as_ref()
{
if self.version == "1" && bits <= self.claim && self.check_expiration() && self.resource == resource.as_ref() {
let hex_digits = ((self.claim as f32) / 4.).floor() as usize;
// check challenge
@@ -64,12 +60,7 @@ impl Stamp {
let now = Utc::now();
let ts = now.format("%Y%m%d%H%M%S");
let bits = bits.unwrap_or(20);
let rand = String::from_iter(
Alphanumeric
.sample_iter(rng())
.take(SALT_LENGTH)
.map(char::from),
);
let rand = String::from_iter(Alphanumeric.sample_iter(rng()).take(SALT_LENGTH).map(char::from));
let challenge = format!("{}:{}:{}:{}:{}:{}", version, bits, ts, &resource, "", rand);
Stamp {
@@ -102,22 +93,12 @@ impl TryFrom<&str> for Stamp {
fn try_from(value: &str) -> Result<Self, Self::Error> {
let stamp_vec = value.split(':').collect::<Vec<&str>>();
if stamp_vec.len() != 7
|| stamp_vec
.iter()
.enumerate()
.any(|(i, s)| i != 4 && s.is_empty())
{
return Err(format!(
"Malformed stamp, expected 6 parts, got {}",
stamp_vec.len()
));
if stamp_vec.len() != 7 || stamp_vec.iter().enumerate().any(|(i, s)| i != 4 && s.is_empty()) {
return Err(format!("Malformed stamp, expected 6 parts, got {}", stamp_vec.len()));
}
Ok(Stamp {
version: stamp_vec[0].to_string(),
claim: stamp_vec[1]
.parse()
.map_err(|_| "Malformed stamp".to_string())?,
claim: stamp_vec[1].parse().map_err(|_| "Malformed stamp".to_string())?,
ts: stamp_vec[2].to_string(),
resource: stamp_vec[3].to_string(),
ext: stamp_vec[4].to_string(),
@@ -129,7 +110,7 @@ impl TryFrom<&str> for Stamp {
#[cfg(test)]
mod tests {
use rand::{distr::Alphanumeric, Rng};
use rand::{Rng, distr::Alphanumeric};
use rayon::prelude::*;
use super::Stamp;
@@ -139,9 +120,7 @@ mod tests {
{
let response = Stamp::mint("test".into(), Some(20)).format();
assert!(
Stamp::try_from(response.as_str())
.unwrap()
.check(20, "test"),
Stamp::try_from(response.as_str()).unwrap().check(20, "test"),
"should pass"
);
}
@@ -149,18 +128,14 @@ mod tests {
{
let response = Stamp::mint("test".into(), Some(19)).format();
assert!(
!Stamp::try_from(response.as_str())
.unwrap()
.check(20, "test"),
!Stamp::try_from(response.as_str()).unwrap().check(20, "test"),
"should fail with lower bits"
);
}
{
let response = Stamp::mint("test".into(), Some(20)).format();
assert!(
!Stamp::try_from(response.as_str())
.unwrap()
.check(20, "test2"),
!Stamp::try_from(response.as_str()).unwrap().check(20, "test2"),
"should fail with different resource"
);
}
@@ -177,10 +152,7 @@ mod tests {
let response = Stamp::mint("test".into(), Some(20));
assert_eq!(
response.format(),
format!(
"1:20:{}:test::{}:{}",
response.ts, response.rand, response.counter
)
format!("1:20:{}:test::{}:{}", response.ts, response.rand, response.counter)
);
}
@@ -195,9 +167,7 @@ mod tests {
.collect::<String>();
let response = Stamp::mint(resource.clone(), Some(bit)).format();
assert!(
Stamp::try_from(response.as_str())
.unwrap()
.check(bit, resource),
Stamp::try_from(response.as_str()).unwrap().check(bit, resource),
"should pass"
);
});