feat(native): native reader for indexer (#14055)

This commit is contained in:
DarkSky
2025-12-07 16:22:11 +08:00
committed by GitHub
parent 69cdeedc4e
commit cf4e37c584
28 changed files with 1376 additions and 569 deletions

View File

@@ -10,11 +10,13 @@ crate-type = ["cdylib", "rlib"]
use-as-lib = ["napi-derive/noop", "napi/noop"]
[dependencies]
affine_common = { workspace = true, features = ["ydoc-loader"] }
affine_schema = { path = "../schema" }
anyhow = { workspace = true }
chrono = { workspace = true }
napi = { workspace = true }
napi-derive = { workspace = true }
serde = { workspace = true, features = ["derive"] }
sqlx = { workspace = true, default-features = false, features = [
"chrono",
"macros",
@@ -25,6 +27,7 @@ sqlx = { workspace = true, default-features = false, features = [
] }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["full"] }
y-octo = { workspace = true }
[target.'cfg(any(target_os = "ios", target_os = "android"))'.dependencies]
uniffi = { workspace = true }
@@ -42,3 +45,8 @@ sqlx = { workspace = true, default-features = false, features = [
"tls-rustls",
] }
tokio = { workspace = true, features = ["full"] }
[dev-dependencies]
serde_json = { workspace = true }
uuid = { workspace = true, features = ["v4"] }

View File

@@ -103,7 +103,7 @@ impl SqliteDocStorage {
sqlx::query(r#"INSERT INTO updates (doc_id, data, created_at) VALUES ($1, $2, $3);"#)
.bind(doc_id)
.bind(update.as_ref())
.bind(update)
.bind(timestamp)
.execute(&mut *tx)
.await?;
@@ -358,7 +358,7 @@ mod tests {
assert_eq!(result.len(), 4);
assert_eq!(
result.iter().map(|u| u.bin.as_ref()).collect::<Vec<_>>(),
result.iter().map(|u| u.bin.to_vec()).collect::<Vec<_>>(),
updates
);
}
@@ -382,7 +382,7 @@ mod tests {
let result = storage.get_doc_snapshot("test".to_string()).await.unwrap();
assert!(result.is_some());
assert_eq!(result.unwrap().bin.as_ref(), vec![0, 0]);
assert_eq!(result.unwrap().bin.to_vec(), vec![0, 0]);
}
#[tokio::test]
@@ -400,7 +400,7 @@ mod tests {
let result = storage.get_doc_snapshot("test".to_string()).await.unwrap();
assert!(result.is_some());
assert_eq!(result.unwrap().bin.as_ref(), vec![0, 0]);
assert_eq!(result.unwrap().bin.to_vec(), vec![0, 0]);
let snapshot = DocRecord {
doc_id: "test".to_string(),
@@ -416,7 +416,7 @@ mod tests {
let result = storage.get_doc_snapshot("test".to_string()).await.unwrap();
assert!(result.is_some());
assert_eq!(result.unwrap().bin.as_ref(), vec![0, 0]);
assert_eq!(result.unwrap().bin.to_vec(), vec![0, 0]);
}
#[tokio::test]

View File

@@ -1,3 +1,5 @@
use affine_common::doc_parser::ParseError;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, thiserror::Error)]
@@ -8,4 +10,6 @@ pub enum Error {
MigrateError(#[from] sqlx::migrate::MigrateError),
#[error("Invalid operation")]
InvalidOperation,
#[error(transparent)]
Parse(#[from] ParseError),
}

View File

@@ -0,0 +1,180 @@
use affine_common::doc_parser::{parse_doc_from_binary, BlockInfo, CrawlResult, ParseError};
use napi_derive::napi;
use serde::Serialize;
use y_octo::DocOptions;
use super::{error::Result, storage::SqliteDocStorage};
#[napi(object)]
#[derive(Debug, Serialize)]
pub struct NativeBlockInfo {
pub block_id: String,
pub flavour: String,
pub content: Option<Vec<String>>,
pub blob: Option<Vec<String>>,
pub ref_doc_id: Option<Vec<String>>,
pub ref_info: Option<Vec<String>>,
pub parent_flavour: Option<String>,
pub parent_block_id: Option<String>,
pub additional: Option<String>,
}
#[napi(object)]
#[derive(Debug, Serialize)]
pub struct NativeCrawlResult {
pub blocks: Vec<NativeBlockInfo>,
pub title: String,
pub summary: String,
}
impl From<BlockInfo> for NativeBlockInfo {
fn from(value: BlockInfo) -> Self {
Self {
block_id: value.block_id,
flavour: value.flavour,
content: value.content,
blob: value.blob,
ref_doc_id: value.ref_doc_id,
ref_info: value.ref_info,
parent_flavour: value.parent_flavour,
parent_block_id: value.parent_block_id,
additional: value.additional,
}
}
}
impl From<CrawlResult> for NativeCrawlResult {
fn from(value: CrawlResult) -> Self {
Self {
blocks: value.blocks.into_iter().map(Into::into).collect(),
title: value.title,
summary: value.summary,
}
}
}
impl SqliteDocStorage {
pub async fn crawl_doc_data(&self, doc_id: &str) -> Result<NativeCrawlResult> {
let doc_bin = self
.load_doc_binary(doc_id)
.await?
.ok_or(ParseError::DocNotFound)?;
let result = parse_doc_from_binary(doc_bin, doc_id.to_string())?;
Ok(result.into())
}
async fn load_doc_binary(&self, doc_id: &str) -> Result<Option<Vec<u8>>> {
let snapshot = self.get_doc_snapshot(doc_id.to_string()).await?;
let mut updates = self.get_doc_updates(doc_id.to_string()).await?;
if snapshot.is_none() && updates.is_empty() {
return Ok(None);
}
updates.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
let mut segments =
Vec::with_capacity(snapshot.as_ref().map(|_| 1).unwrap_or(0) + updates.len());
if let Some(record) = snapshot {
segments.push(record.bin.to_vec());
}
segments.extend(updates.into_iter().map(|update| update.bin.to_vec()));
merge_updates(segments, doc_id).map(Some)
}
}
fn merge_updates(mut segments: Vec<Vec<u8>>, guid: &str) -> Result<Vec<u8>> {
if segments.is_empty() {
return Err(ParseError::DocNotFound.into());
}
if segments.len() == 1 {
return segments.pop().ok_or(ParseError::DocNotFound.into());
}
let mut doc = DocOptions::new().with_guid(guid.to_string()).build();
for update in segments.iter() {
doc
.apply_update_from_binary_v1(update)
.map_err(|_| ParseError::InvalidBinary)?;
}
let buffer = doc
.encode_update_v1()
.map_err(|err| ParseError::ParserError(err.to_string()))?;
Ok(buffer)
}
#[cfg(test)]
mod tests {
use std::path::{Path, PathBuf};
use affine_common::doc_parser::ParseError;
use chrono::Utc;
use serde_json::Value;
use tokio::fs;
use uuid::Uuid;
use super::{super::error::Error, *};
const DEMO_BIN: &[u8] = include_bytes!("../../../../common/native/fixtures/demo.ydoc");
const DEMO_JSON: &[u8] = include_bytes!("../../../../common/native/fixtures/demo.ydoc.json");
fn temp_workspace_dir() -> PathBuf {
std::env::temp_dir().join(format!("affine-native-{}", Uuid::new_v4()))
}
async fn init_db(path: &Path) -> SqliteDocStorage {
fs::create_dir_all(path.parent().unwrap()).await.unwrap();
let storage = SqliteDocStorage::new(path.to_string_lossy().into_owned());
storage.connect().await.unwrap();
storage
}
async fn cleanup(path: &Path) {
let _ = fs::remove_dir_all(path.parent().unwrap()).await;
}
#[tokio::test]
async fn parse_demo_snapshot_matches_fixture() {
let base = temp_workspace_dir();
fs::create_dir_all(&base).await.unwrap();
let db_path = base.join("storage.db");
let storage = init_db(&db_path).await;
sqlx::query(r#"INSERT INTO snapshots (doc_id, data, updated_at) VALUES (?, ?, ?)"#)
.bind("demo-doc")
.bind(DEMO_BIN)
.bind(Utc::now().naive_utc())
.execute(&storage.pool)
.await
.unwrap();
let result = storage.crawl_doc_data("demo-doc").await.unwrap();
let expected: Value = serde_json::from_slice(DEMO_JSON).unwrap();
let actual = serde_json::to_value(&result).unwrap();
assert_eq!(expected, actual);
storage.close().await;
cleanup(&db_path).await;
}
#[tokio::test]
async fn missing_doc_returns_error() {
let base = temp_workspace_dir();
fs::create_dir_all(&base).await.unwrap();
let db_path = base.join("storage.db");
let storage = init_db(&db_path).await;
let err = storage.crawl_doc_data("absent-doc").await.unwrap_err();
assert!(matches!(err, Error::Parse(ParseError::DocNotFound)));
storage.close().await;
cleanup(&db_path).await;
}
}

View File

@@ -3,6 +3,7 @@ pub mod blob_sync;
pub mod doc;
pub mod doc_sync;
pub mod error;
pub mod indexer;
pub mod pool;
pub mod storage;
@@ -117,6 +118,20 @@ impl DocStoragePool {
Ok(())
}
#[napi]
pub async fn crawl_doc_data(
&self,
universal_id: String,
doc_id: String,
) -> Result<indexer::NativeCrawlResult> {
let result = self
.get(universal_id)
.await?
.crawl_doc_data(&doc_id)
.await?;
Ok(result)
}
#[napi]
pub async fn set_space_id(&self, universal_id: String, space_id: String) -> Result<()> {
self.get(universal_id).await?.set_space_id(space_id).await?;