mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-25 10:22:55 +08:00
feat(native): native reader for indexer (#14055)
This commit is contained in:
@@ -10,11 +10,13 @@ crate-type = ["cdylib", "rlib"]
|
||||
use-as-lib = ["napi-derive/noop", "napi/noop"]
|
||||
|
||||
[dependencies]
|
||||
affine_common = { workspace = true, features = ["ydoc-loader"] }
|
||||
affine_schema = { path = "../schema" }
|
||||
anyhow = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
napi = { workspace = true }
|
||||
napi-derive = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
@@ -25,6 +27,7 @@ sqlx = { workspace = true, default-features = false, features = [
|
||||
] }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
y-octo = { workspace = true }
|
||||
|
||||
[target.'cfg(any(target_os = "ios", target_os = "android"))'.dependencies]
|
||||
uniffi = { workspace = true }
|
||||
@@ -42,3 +45,8 @@ sqlx = { workspace = true, default-features = false, features = [
|
||||
"tls-rustls",
|
||||
] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = { workspace = true }
|
||||
uuid = { workspace = true, features = ["v4"] }
|
||||
|
||||
@@ -103,7 +103,7 @@ impl SqliteDocStorage {
|
||||
|
||||
sqlx::query(r#"INSERT INTO updates (doc_id, data, created_at) VALUES ($1, $2, $3);"#)
|
||||
.bind(doc_id)
|
||||
.bind(update.as_ref())
|
||||
.bind(update)
|
||||
.bind(timestamp)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
@@ -358,7 +358,7 @@ mod tests {
|
||||
|
||||
assert_eq!(result.len(), 4);
|
||||
assert_eq!(
|
||||
result.iter().map(|u| u.bin.as_ref()).collect::<Vec<_>>(),
|
||||
result.iter().map(|u| u.bin.to_vec()).collect::<Vec<_>>(),
|
||||
updates
|
||||
);
|
||||
}
|
||||
@@ -382,7 +382,7 @@ mod tests {
|
||||
let result = storage.get_doc_snapshot("test".to_string()).await.unwrap();
|
||||
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap().bin.as_ref(), vec![0, 0]);
|
||||
assert_eq!(result.unwrap().bin.to_vec(), vec![0, 0]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -400,7 +400,7 @@ mod tests {
|
||||
let result = storage.get_doc_snapshot("test".to_string()).await.unwrap();
|
||||
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap().bin.as_ref(), vec![0, 0]);
|
||||
assert_eq!(result.unwrap().bin.to_vec(), vec![0, 0]);
|
||||
|
||||
let snapshot = DocRecord {
|
||||
doc_id: "test".to_string(),
|
||||
@@ -416,7 +416,7 @@ mod tests {
|
||||
let result = storage.get_doc_snapshot("test".to_string()).await.unwrap();
|
||||
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap().bin.as_ref(), vec![0, 0]);
|
||||
assert_eq!(result.unwrap().bin.to_vec(), vec![0, 0]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use affine_common::doc_parser::ParseError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -8,4 +10,6 @@ pub enum Error {
|
||||
MigrateError(#[from] sqlx::migrate::MigrateError),
|
||||
#[error("Invalid operation")]
|
||||
InvalidOperation,
|
||||
#[error(transparent)]
|
||||
Parse(#[from] ParseError),
|
||||
}
|
||||
|
||||
180
packages/frontend/native/nbstore/src/indexer.rs
Normal file
180
packages/frontend/native/nbstore/src/indexer.rs
Normal file
@@ -0,0 +1,180 @@
|
||||
use affine_common::doc_parser::{parse_doc_from_binary, BlockInfo, CrawlResult, ParseError};
|
||||
use napi_derive::napi;
|
||||
use serde::Serialize;
|
||||
use y_octo::DocOptions;
|
||||
|
||||
use super::{error::Result, storage::SqliteDocStorage};
|
||||
|
||||
#[napi(object)]
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct NativeBlockInfo {
|
||||
pub block_id: String,
|
||||
pub flavour: String,
|
||||
pub content: Option<Vec<String>>,
|
||||
pub blob: Option<Vec<String>>,
|
||||
pub ref_doc_id: Option<Vec<String>>,
|
||||
pub ref_info: Option<Vec<String>>,
|
||||
pub parent_flavour: Option<String>,
|
||||
pub parent_block_id: Option<String>,
|
||||
pub additional: Option<String>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct NativeCrawlResult {
|
||||
pub blocks: Vec<NativeBlockInfo>,
|
||||
pub title: String,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
impl From<BlockInfo> for NativeBlockInfo {
|
||||
fn from(value: BlockInfo) -> Self {
|
||||
Self {
|
||||
block_id: value.block_id,
|
||||
flavour: value.flavour,
|
||||
content: value.content,
|
||||
blob: value.blob,
|
||||
ref_doc_id: value.ref_doc_id,
|
||||
ref_info: value.ref_info,
|
||||
parent_flavour: value.parent_flavour,
|
||||
parent_block_id: value.parent_block_id,
|
||||
additional: value.additional,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CrawlResult> for NativeCrawlResult {
|
||||
fn from(value: CrawlResult) -> Self {
|
||||
Self {
|
||||
blocks: value.blocks.into_iter().map(Into::into).collect(),
|
||||
title: value.title,
|
||||
summary: value.summary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteDocStorage {
|
||||
pub async fn crawl_doc_data(&self, doc_id: &str) -> Result<NativeCrawlResult> {
|
||||
let doc_bin = self
|
||||
.load_doc_binary(doc_id)
|
||||
.await?
|
||||
.ok_or(ParseError::DocNotFound)?;
|
||||
|
||||
let result = parse_doc_from_binary(doc_bin, doc_id.to_string())?;
|
||||
Ok(result.into())
|
||||
}
|
||||
|
||||
async fn load_doc_binary(&self, doc_id: &str) -> Result<Option<Vec<u8>>> {
|
||||
let snapshot = self.get_doc_snapshot(doc_id.to_string()).await?;
|
||||
let mut updates = self.get_doc_updates(doc_id.to_string()).await?;
|
||||
|
||||
if snapshot.is_none() && updates.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
updates.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
|
||||
|
||||
let mut segments =
|
||||
Vec::with_capacity(snapshot.as_ref().map(|_| 1).unwrap_or(0) + updates.len());
|
||||
if let Some(record) = snapshot {
|
||||
segments.push(record.bin.to_vec());
|
||||
}
|
||||
segments.extend(updates.into_iter().map(|update| update.bin.to_vec()));
|
||||
|
||||
merge_updates(segments, doc_id).map(Some)
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_updates(mut segments: Vec<Vec<u8>>, guid: &str) -> Result<Vec<u8>> {
|
||||
if segments.is_empty() {
|
||||
return Err(ParseError::DocNotFound.into());
|
||||
}
|
||||
|
||||
if segments.len() == 1 {
|
||||
return segments.pop().ok_or(ParseError::DocNotFound.into());
|
||||
}
|
||||
|
||||
let mut doc = DocOptions::new().with_guid(guid.to_string()).build();
|
||||
for update in segments.iter() {
|
||||
doc
|
||||
.apply_update_from_binary_v1(update)
|
||||
.map_err(|_| ParseError::InvalidBinary)?;
|
||||
}
|
||||
|
||||
let buffer = doc
|
||||
.encode_update_v1()
|
||||
.map_err(|err| ParseError::ParserError(err.to_string()))?;
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use affine_common::doc_parser::ParseError;
|
||||
use chrono::Utc;
|
||||
use serde_json::Value;
|
||||
use tokio::fs;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{super::error::Error, *};
|
||||
|
||||
const DEMO_BIN: &[u8] = include_bytes!("../../../../common/native/fixtures/demo.ydoc");
|
||||
const DEMO_JSON: &[u8] = include_bytes!("../../../../common/native/fixtures/demo.ydoc.json");
|
||||
|
||||
fn temp_workspace_dir() -> PathBuf {
|
||||
std::env::temp_dir().join(format!("affine-native-{}", Uuid::new_v4()))
|
||||
}
|
||||
|
||||
async fn init_db(path: &Path) -> SqliteDocStorage {
|
||||
fs::create_dir_all(path.parent().unwrap()).await.unwrap();
|
||||
let storage = SqliteDocStorage::new(path.to_string_lossy().into_owned());
|
||||
storage.connect().await.unwrap();
|
||||
storage
|
||||
}
|
||||
|
||||
async fn cleanup(path: &Path) {
|
||||
let _ = fs::remove_dir_all(path.parent().unwrap()).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn parse_demo_snapshot_matches_fixture() {
|
||||
let base = temp_workspace_dir();
|
||||
fs::create_dir_all(&base).await.unwrap();
|
||||
let db_path = base.join("storage.db");
|
||||
|
||||
let storage = init_db(&db_path).await;
|
||||
sqlx::query(r#"INSERT INTO snapshots (doc_id, data, updated_at) VALUES (?, ?, ?)"#)
|
||||
.bind("demo-doc")
|
||||
.bind(DEMO_BIN)
|
||||
.bind(Utc::now().naive_utc())
|
||||
.execute(&storage.pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let result = storage.crawl_doc_data("demo-doc").await.unwrap();
|
||||
|
||||
let expected: Value = serde_json::from_slice(DEMO_JSON).unwrap();
|
||||
let actual = serde_json::to_value(&result).unwrap();
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
storage.close().await;
|
||||
cleanup(&db_path).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_doc_returns_error() {
|
||||
let base = temp_workspace_dir();
|
||||
fs::create_dir_all(&base).await.unwrap();
|
||||
let db_path = base.join("storage.db");
|
||||
|
||||
let storage = init_db(&db_path).await;
|
||||
|
||||
let err = storage.crawl_doc_data("absent-doc").await.unwrap_err();
|
||||
assert!(matches!(err, Error::Parse(ParseError::DocNotFound)));
|
||||
|
||||
storage.close().await;
|
||||
cleanup(&db_path).await;
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ pub mod blob_sync;
|
||||
pub mod doc;
|
||||
pub mod doc_sync;
|
||||
pub mod error;
|
||||
pub mod indexer;
|
||||
pub mod pool;
|
||||
pub mod storage;
|
||||
|
||||
@@ -117,6 +118,20 @@ impl DocStoragePool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn crawl_doc_data(
|
||||
&self,
|
||||
universal_id: String,
|
||||
doc_id: String,
|
||||
) -> Result<indexer::NativeCrawlResult> {
|
||||
let result = self
|
||||
.get(universal_id)
|
||||
.await?
|
||||
.crawl_doc_data(&doc_id)
|
||||
.await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn set_space_id(&self, universal_id: String, space_id: String) -> Result<()> {
|
||||
self.get(universal_id).await?.set_space_id(space_id).await?;
|
||||
|
||||
Reference in New Issue
Block a user