feat(y-octo): import y-octo monorepo (#11750)

This commit is contained in:
Brooooooklyn
2025-04-21 02:51:15 +00:00
parent e3973538e8
commit 95dbda24fc
127 changed files with 17319 additions and 18 deletions

View File

@@ -0,0 +1,95 @@
[package]
authors = [
"DarkSky <darksky2048@gmail.com>",
"forehalo <forehalo@gmail.com>",
"x1a0t <405028157@qq.com>",
"Brooklyn <lynweklm@gmail.com>",
]
description = "High-performance and thread-safe CRDT implementation compatible with Yjs"
edition = "2021"
homepage = "https://github.com/toeverything/y-octo"
include = ["src/**/*", "benches/**/*", "bin/**/*", "LICENSE", "README.md"]
keywords = ["collaboration", "crdt", "crdts", "yjs", "yata"]
license = "MIT"
name = "y-octo"
readme = "README.md"
repository = "https://github.com/toeverything/y-octo"
version = "0.0.1"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ahash = { workspace = true }
bitvec = { workspace = true }
byteorder = { workspace = true }
lasso = { workspace = true }
log = { workspace = true }
nanoid = { workspace = true }
nom = { workspace = true }
ordered-float = { workspace = true }
rand = { workspace = true }
rand_chacha = { workspace = true }
rand_distr = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
smol_str = { workspace = true }
thiserror = { workspace = true }
[features]
bench = []
debug = []
large_refs = []
serde_json = []
[target.'cfg(fuzzing)'.dependencies]
arbitrary = { workspace = true }
ordered-float = { workspace = true, features = ["arbitrary"] }
[target.'cfg(loom)'.dependencies]
loom = { workspace = true }
# override the dev-dependencies feature
async-lock = { workspace = true }
[dev-dependencies]
assert-json-diff = { workspace = true }
criterion = { workspace = true }
lib0 = { workspace = true }
ordered-float = { workspace = true, features = ["proptest"] }
path-ext = { workspace = true }
proptest = { workspace = true }
proptest-derive = { workspace = true }
yrs = { workspace = true }
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = [
'cfg(debug)',
'cfg(fuzzing)',
'cfg(loom)',
] }
[[bench]]
harness = false
name = "array_ops_benchmarks"
[[bench]]
harness = false
name = "codec_benchmarks"
[[bench]]
harness = false
name = "map_ops_benchmarks"
[[bench]]
harness = false
name = "text_ops_benchmarks"
[[bench]]
harness = false
name = "apply_benchmarks"
[[bench]]
harness = false
name = "update_benchmarks"
[lib]
bench = true

View File

@@ -0,0 +1,9 @@
The MIT License (MIT)
Copyright (c) 2022-present TOEVERYTHING PTE. LTD. and its affiliates.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,100 @@
# Y-Octo
[![test](https://github.com/toeverything/y-octo/actions/workflows/y-octo.yml/badge.svg)](https://github.com/toeverything/y-octo/actions/workflows/y-octo.yml)
[![docs]](https://docs.rs/y-octo/latest/y_octo)
[![crates]](https://crates.io/crates/y-octo)
[![codecov]](https://codecov.io/gh/toeverything/y-octo)
Y-Octo is a high-performance CRDT implementation compatible with [yjs].
## Introduction
Y-Octo is a tiny, ultra-fast CRDT collaboration library built for all major platforms. Developers can use Y-Octo as the [Single source of truth](https://en.wikipedia.org/wiki/Single_source_of_truth) for their application state, naturally turning the application into a [local-first](https://www.inkandswitch.com/local-first/) collaborative app.
Y-Octo also has interoperability and binary compatibility with [yjs]. Developers can use [yjs] to develop local-first web applications and collaborate with Y-Octo in native apps alongside web apps.
## Who are using
<a href="https://affine.pro"><img src="./assets/affine.svg" /></a>
[AFFiNE](https://affine.pro) is using y-octo in production. There are [Electron](https://affine.pro/download) app and [Node.js server](https://github.com/toeverything/AFFiNE/tree/canary/packages/backend/native) using y-octo in production.
<a href="https://www.mysc.app/"><img src="https://www.mysc.app/images/logo_blk.webp" width="120px" /></a>
[Mysc](https://www.mysc.app/) is using y-octo in the Rust server, and the iOS/Android client via the Swift/Kotlin bindings (Official bindings coming soon).
## Features
- ✅ Collaborative Text
- ✅ Read and write styled Unicode compatible data.
- 🚧 Add, modify and delete text styles.
- 🚧 Embedded JS data types and collaborative types.
- ✅ Collaborative types of thread-safe.
- Collaborative Array
- ✅ Add, modify, and delete basic JS data types.
- ✅ Recursively add, modify, and delete collaborative types.
- ✅ Collaborative types of thread-safe.
- 🚧 Recursive event subscription
- Collaborative Map
- ✅ Add, modify, and delete basic JS data types.
- ✅ Recursively add, modify, and delete collaborative types.
- ✅ Collaborative types of thread-safe.
- 🚧 Recursive event subscription
- 🚧 Collaborative Xml (Fragment / Element)
- ✅ Collaborative Doc Container
- ✅ YATA CRDT state apply/diff compatible with [yjs]
- ✅ State sync of thread-safe.
- ✅ Store all collaborative types and JS data types
- ✅ Update event subscription.
- 🚧 Sub Document.
- ✅ Yjs binary encoding
- ✅ Awareness encoding.
- ✅ Primitive type encoding.
- ✅ Sync Protocol encoding.
- ✅ Yjs update v1 encoding.
- 🚧 Yjs update v2 encoding.
## Testing & Linting
Put everything to the test! We've established various test suites, but we're continually striving to enhance our coverage
- Rust Tests
- Unit tests
- [Loom](https://docs.rs/loom/latest/loom/) multi-threading tests
- [Miri](https://github.com/rust-lang/miri) undefined behavior tests
- [Address Sanitizer](https://doc.rust-lang.org/beta/unstable-book/compiler-flags/sanitizer.html) memory error detections
- [Fuzzing](https://github.com/rust-fuzz/cargo-fuzz) fuzzing tests
- Node Tests
- Smoke Tests
- Eslint, Clippy
## Related projects
- [OctoBase]: The open-source embedded database based on Y-Octo.
- [yjs]: Shared data types for building collaborative software in web.
## Maintainers
- [DarkSky](https://github.com/darkskygit)
- [liuyi](https://github.com/forehalo)
- [LongYinan](https://github.com/Brooooooklyn)
## Why not [yrs](https://github.com/y-crdt/y-crdt/)
See [Why we're not using yrs](./y-octo-utils/yrs-is-unsafe/README.md)
## License
Y-Octo are [MIT licensed].
[codecov]: https://codecov.io/gh/toeverything/y-octo/graph/badge.svg?token=9AQY5Q1BYH
[crates]: https://img.shields.io/crates/v/y-octo.svg
[docs]: https://img.shields.io/docsrs/y-octo.svg
[test]: https://github.com/toeverything/y-octo/actions/workflows/y-octo.yml/badge.svg
[yjs]: https://github.com/yjs/yjs
[Address Sanitizer]: https://github.com/toeverything/y-octo/actions/workflows/y-octo-asan.yml/badge.svg
[Memory Leak Detect]: https://github.com/toeverything/y-octo/actions/workflows/y-octo-memory-test.yml/badge.svg
[OctoBase]: https://github.com/toeverything/octobase
[BlockSuite]: https://github.com/toeverything/blocksuite
[AFFiNE]: https://github.com/toeverything/affine
[MIT licensed]: ./LICENSE

View File

@@ -0,0 +1,34 @@
mod utils;
use std::time::Duration;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;
fn apply(c: &mut Criterion) {
let files = Files::load();
let mut group = c.benchmark_group("apply");
group.measurement_time(Duration::from_secs(15));
for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("apply with jwst", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use y_octo::*;
let mut doc = Doc::new();
doc.apply_update_from_binary_v1(content.clone()).unwrap()
});
},
);
}
group.finish();
}
criterion_group!(benches, apply);
criterion_main!(benches);

View File

@@ -0,0 +1,71 @@
use std::time::Duration;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{Rng, SeedableRng};
fn operations(c: &mut Criterion) {
let mut group = c.benchmark_group("ops/array");
group.measurement_time(Duration::from_secs(15));
group.bench_function("jwst/insert", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(1234);
let idxs = (0..99)
.map(|_| rng.random_range(0..base_text.len() as u64))
.collect::<Vec<_>>();
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut array = doc.get_or_create_array("test").unwrap();
for c in base_text.chars() {
array.push(c.to_string()).unwrap();
}
for idx in &idxs {
array.insert(*idx, "test").unwrap();
}
});
});
group.bench_function("jwst/insert range", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(1234);
let idxs = (0..99)
.map(|_| rng.random_range(0..base_text.len() as u64))
.collect::<Vec<_>>();
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut array = doc.get_or_create_array("test").unwrap();
for c in base_text.chars() {
array.push(c.to_string()).unwrap();
}
for idx in &idxs {
array.insert(*idx, "test1").unwrap();
array.insert(idx + 1, "test2").unwrap();
}
});
});
group.bench_function("jwst/remove", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut array = doc.get_or_create_array("test").unwrap();
for c in base_text.chars() {
array.push(c.to_string()).unwrap();
}
for idx in (0..base_text.len() as u64).rev() {
array.remove(idx, 1).unwrap();
}
});
});
group.finish();
}
criterion_group!(benches, operations);
criterion_main!(benches);

View File

@@ -0,0 +1,91 @@
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
use y_octo::{read_var_i32, read_var_u64, write_var_i32, write_var_u64};
const BENCHMARK_SIZE: u32 = 100000;
fn codec(c: &mut Criterion) {
let mut codec_group = c.benchmark_group("codec");
codec_group.sampling_mode(SamplingMode::Flat);
{
codec_group.bench_function("jwst encode var_int (32 bit)", |b| {
b.iter(|| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as i32) {
write_var_i32(&mut encoder, i).unwrap();
}
})
});
codec_group.bench_function("jwst decode var_int (32 bit)", |b| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as i32) {
write_var_i32(&mut encoder, i).unwrap();
}
b.iter(|| {
let mut decoder = encoder.as_slice();
for i in 0..(BENCHMARK_SIZE as i32) {
let (tail, num) = read_var_i32(decoder).unwrap();
decoder = tail;
assert_eq!(num, i);
}
})
});
}
{
codec_group.bench_function("jwst encode var_uint (32 bit)", |b| {
b.iter(|| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..BENCHMARK_SIZE {
write_var_u64(&mut encoder, i as u64).unwrap();
}
})
});
codec_group.bench_function("jwst decode var_uint (32 bit)", |b| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..BENCHMARK_SIZE {
write_var_u64(&mut encoder, i as u64).unwrap();
}
b.iter(|| {
let mut decoder = encoder.as_slice();
for i in 0..BENCHMARK_SIZE {
let (tail, num) = read_var_u64(decoder).unwrap();
decoder = tail;
assert_eq!(num as u32, i);
}
})
});
}
{
codec_group.bench_function("jwst encode var_uint (64 bit)", |b| {
b.iter(|| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as u64) {
write_var_u64(&mut encoder, i).unwrap();
}
})
});
codec_group.bench_function("jwst decode var_uint (64 bit)", |b| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as u64) {
write_var_u64(&mut encoder, i).unwrap();
}
b.iter(|| {
let mut decoder = encoder.as_slice();
for i in 0..(BENCHMARK_SIZE as u64) {
let (tail, num) = read_var_u64(decoder).unwrap();
decoder = tail;
assert_eq!(num, i);
}
})
});
}
}
criterion_group!(benches, codec);
criterion_main!(benches);

View File

@@ -0,0 +1,65 @@
use std::time::Duration;
use criterion::{criterion_group, criterion_main, Criterion};
fn operations(c: &mut Criterion) {
let mut group = c.benchmark_group("ops/map");
group.measurement_time(Duration::from_secs(15));
group.bench_function("jwst/insert", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut map = doc.get_or_create_map("test").unwrap();
for (idx, key) in base_text.iter().enumerate() {
map.insert(key.to_string(), idx).unwrap();
}
});
});
group.bench_function("jwst/get", |b| {
use y_octo::*;
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
let doc = Doc::default();
let mut map = doc.get_or_create_map("test").unwrap();
for (idx, key) in base_text.iter().enumerate() {
map.insert(key.to_string(), idx).unwrap();
}
b.iter(|| {
for key in &base_text {
map.get(key);
}
});
});
group.bench_function("jwst/remove", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut map = doc.get_or_create_map("test").unwrap();
for (idx, key) in base_text.iter().enumerate() {
map.insert(key.to_string(), idx).unwrap();
}
for key in &base_text {
map.remove(key);
}
});
});
group.finish();
}
criterion_group!(benches, operations);
criterion_main!(benches);

View File

@@ -0,0 +1,50 @@
use std::time::Duration;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{Rng, SeedableRng};
fn operations(c: &mut Criterion) {
let mut group = c.benchmark_group("ops/text");
group.measurement_time(Duration::from_secs(15));
group.bench_function("jwst/insert", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(1234);
let idxs = (0..99)
.map(|_| rng.random_range(0..base_text.len() as u64))
.collect::<Vec<_>>();
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, base_text).unwrap();
for idx in &idxs {
text.insert(*idx, "test").unwrap();
}
});
});
group.bench_function("jwst/remove", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
b.iter(|| {
use y_octo::*;
let doc = Doc::default();
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, base_text).unwrap();
text.insert(0, base_text).unwrap();
text.insert(0, base_text).unwrap();
for idx in (0..base_text.len() as u64).rev() {
text.remove(idx, 1).unwrap();
}
});
});
group.finish();
}
criterion_group!(benches, operations);
criterion_main!(benches);

View File

@@ -0,0 +1,34 @@
mod utils;
use std::time::Duration;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;
fn update(c: &mut Criterion) {
let files = Files::load();
let mut group = c.benchmark_group("update");
group.measurement_time(Duration::from_secs(15));
for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("parse with jwst", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use y_octo::*;
let mut decoder = RawDecoder::new(content);
Update::read(&mut decoder).unwrap()
});
},
);
}
group.finish();
}
criterion_group!(benches, update);
criterion_main!(benches);

View File

@@ -0,0 +1,42 @@
use std::{
fs::{read, read_dir},
path::{Path, PathBuf},
};
use path_ext::PathExt;
pub struct File {
pub path: PathBuf,
pub content: Vec<u8>,
}
const BASE: &str = "src/fixtures/";
impl File {
fn new(path: &Path) -> Self {
let content = read(path).unwrap();
Self {
path: path.into(),
content,
}
}
}
pub struct Files {
pub files: Vec<File>,
}
impl Files {
pub fn load() -> Self {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(BASE);
let files = read_dir(path).unwrap();
let files = files
.flatten()
.filter(|f| f.path().is_file() && f.path().ext_str() == "bin")
.map(|f| File::new(&f.path()))
.collect::<Vec<_>>();
Self { files }
}
}

View File

@@ -0,0 +1,3 @@
mod files;
pub use files::Files;

View File

@@ -0,0 +1,87 @@
use std::io::{Error, Write};
use nom::bytes::complete::take;
use super::*;
pub fn read_var_buffer(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (tail, len) = read_var_u64(input)?;
let (tail, val) = take(len as usize)(tail)?;
Ok((tail, val))
}
pub fn write_var_buffer<W: Write>(buffer: &mut W, data: &[u8]) -> Result<(), Error> {
write_var_u64(buffer, data.len() as u64)?;
buffer.write_all(data)?;
Ok(())
}
#[cfg(test)]
mod tests {
use nom::{
error::{Error, ErrorKind},
AsBytes, Err,
};
use super::*;
#[test]
fn test_read_var_buffer() {
// Test case 1: valid input, buffer length = 5
let input = [0x05, 0x01, 0x02, 0x03, 0x04, 0x05];
let expected_output = [0x01, 0x02, 0x03, 0x04, 0x05];
let result = read_var_buffer(&input);
assert_eq!(result, Ok((&[][..], &expected_output[..])));
// Test case 2: truncated input, missing buffer
let input = [0x05, 0x01, 0x02, 0x03];
let result = read_var_buffer(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[1..], ErrorKind::Eof)))
);
// Test case 3: invalid input
let input = [0xFF, 0x01, 0x02, 0x03];
let result = read_var_buffer(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[2..], ErrorKind::Eof)))
);
// Test case 4: invalid var int encoding
let input = [0xFF, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01];
let result = read_var_buffer(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[7..], ErrorKind::Eof)))
);
}
#[test]
fn test_var_buf_codec() {
test_var_buf_enc_dec(&[]);
test_var_buf_enc_dec(&[0x01, 0x02, 0x03, 0x04, 0x05]);
test_var_buf_enc_dec(b"test_var_buf_enc_dec");
#[cfg(not(miri))]
{
use rand::{rng, Rng};
let mut rng = rng();
for _ in 0..100 {
test_var_buf_enc_dec(&{
let mut bytes = vec![0u8; rng.random_range(0..u16::MAX as usize)];
rng.fill(&mut bytes[..]);
bytes
});
}
}
}
fn test_var_buf_enc_dec(data: &[u8]) {
let mut buf = Vec::<u8>::new();
write_var_buffer(&mut buf, data).unwrap();
let result = read_var_buffer(buf.as_bytes());
assert_eq!(result, Ok((&[][..], data)));
}
}

View File

@@ -0,0 +1,166 @@
use std::io::{Error, Write};
use byteorder::WriteBytesExt;
use nom::Needed;
use super::*;
pub fn read_var_u64(input: &[u8]) -> IResult<&[u8], u64> {
// parse the first byte
if let Some(next_byte) = input.first() {
let mut shift = 7;
let mut curr_byte = *next_byte;
let mut rest = &input[1..];
// same logic in loop, but enable early exit when dealing with small numbers
let mut num = (curr_byte & 0b0111_1111) as u64;
// if the sign bit is set, we need more bits
while (curr_byte >> 7) & 0b1 != 0 {
if let Some(next_byte) = rest.first() {
curr_byte = *next_byte;
// add the remaining 7 bits to the number
num |= ((curr_byte & 0b0111_1111) as u64).wrapping_shl(shift);
shift += 7;
rest = &rest[1..];
} else {
return Err(nom::Err::Incomplete(Needed::new(input.len() + 1)));
}
}
Ok((rest, num))
} else {
Err(nom::Err::Incomplete(Needed::new(1)))
}
}
pub fn write_var_u64<W: Write>(buffer: &mut W, mut num: u64) -> Result<(), Error> {
// bit or 0b1000_0000 pre 7 bit if has more bits
while num >= 0b10000000 {
buffer.write_u8(num as u8 & 0b0111_1111 | 0b10000000)?;
num >>= 7;
}
buffer.write_u8((num & 0b01111111) as u8)?;
Ok(())
}
pub fn read_var_i32(input: &[u8]) -> IResult<&[u8], i32> {
// parse the first byte
if let Some(next_byte) = input.first() {
let mut shift = 6;
let mut curr_byte = *next_byte;
let mut rest: &[u8] = &input[1..];
// get the sign bit and the first 6 bits of the number
let sign_bit = (curr_byte >> 6) & 0b1;
let mut num = (curr_byte & 0b0011_1111) as i64;
// if the sign bit is set, we need more bits
while (curr_byte >> 7) & 0b1 != 0 {
if let Some(next_byte) = rest.first() {
curr_byte = *next_byte;
// add the remaining 7 bits to the number
num |= ((curr_byte & 0b0111_1111) as i64).wrapping_shl(shift);
shift += 7;
rest = &rest[1..];
} else {
return Err(nom::Err::Incomplete(Needed::new(input.len() + 1)));
}
}
// negate the number if the sign bit is set
if sign_bit == 1 {
num = -num;
}
Ok((rest, num as i32))
} else {
Err(nom::Err::Incomplete(Needed::new(1)))
}
}
pub fn write_var_i32<W: Write>(buffer: &mut W, num: i32) -> Result<(), Error> {
let mut num = num as i64;
let is_negative = num < 0;
if is_negative {
num = -num;
}
buffer.write_u8(
// bit or 0b1000_0000 if has more bits
if num > 0b00111111 { 0b10000000 } else { 0 }
// bit or 0b0100_0000 if negative
| if is_negative { 0b0100_0000 } else { 0 }
// store last 6 bits
| num as u8 & 0b0011_1111,
)?;
num >>= 6;
while num > 0 {
buffer.write_u8(
// bit or 0b1000_0000 pre 7 bit if has more bits
if num > 0b01111111 { 0b10000000 } else { 0 }
// store last 7 bits
| num as u8 & 0b0111_1111,
)?;
num >>= 7;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn test_var_uint_enc_dec(num: u64) {
let mut buf = Vec::new();
write_var_u64(&mut buf, num).unwrap();
let (rest, decoded_num) = read_var_u64(&buf).unwrap();
assert_eq!(num, decoded_num);
assert_eq!(rest.len(), 0);
}
fn test_var_int_enc_dec(num: i32) {
{
let mut buf = Vec::new();
write_var_i32(&mut buf, num).unwrap();
let (rest, decoded_num) = read_var_i32(&buf).unwrap();
assert_eq!(num, decoded_num);
assert_eq!(rest.len(), 0);
}
}
#[test]
fn test_var_uint_codec() {
test_var_uint_enc_dec(0);
test_var_uint_enc_dec(1);
test_var_uint_enc_dec(127);
test_var_uint_enc_dec(0b1000_0000);
test_var_uint_enc_dec(0b1_0000_0000);
test_var_uint_enc_dec(0b1_1111_1111);
test_var_uint_enc_dec(0b10_0000_0000);
test_var_uint_enc_dec(0b11_1111_1111);
test_var_uint_enc_dec(0x7fff_ffff_ffff_ffff);
test_var_uint_enc_dec(u64::MAX);
}
#[test]
fn test_var_int() {
test_var_int_enc_dec(0);
test_var_int_enc_dec(1);
test_var_int_enc_dec(-1);
test_var_int_enc_dec(63);
test_var_int_enc_dec(-63);
test_var_int_enc_dec(64);
test_var_int_enc_dec(-64);
test_var_int_enc_dec(i32::MAX);
test_var_int_enc_dec(i32::MIN);
test_var_int_enc_dec(((1 << 20) - 1) * 8);
test_var_int_enc_dec(-((1 << 20) - 1) * 8);
}
}

View File

@@ -0,0 +1,9 @@
mod buffer;
mod integer;
mod string;
pub use buffer::{read_var_buffer, write_var_buffer};
pub use integer::{read_var_i32, read_var_u64, write_var_i32, write_var_u64};
pub use string::{read_var_string, write_var_string};
use super::*;

View File

@@ -0,0 +1,90 @@
use std::io::{Error, Write};
use nom::{combinator::map_res, Parser};
use super::*;
pub fn read_var_string(input: &[u8]) -> IResult<&[u8], String> {
map_res(read_var_buffer, |s| String::from_utf8(s.to_vec())).parse(input)
}
pub fn write_var_string<W: Write, S: AsRef<str>>(buffer: &mut W, input: S) -> Result<(), Error> {
let bytes = input.as_ref().as_bytes();
write_var_buffer(buffer, bytes)?;
Ok(())
}
#[cfg(test)]
mod tests {
use nom::{
error::{Error, ErrorKind},
AsBytes, Err,
};
use super::*;
#[test]
fn test_read_var_string() {
// Test case 1: valid input, string length = 5
let input = [0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F];
let expected_output = "hello".to_string();
let result = read_var_string(&input);
assert_eq!(result, Ok((&[][..], expected_output)));
// Test case 2: missing string length
let input = [0x68, 0x65, 0x6C, 0x6C, 0x6F];
let result = read_var_string(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[1..], ErrorKind::Eof)))
);
// Test case 3: truncated input
let input = [0x05, 0x68, 0x65, 0x6C, 0x6C];
let result = read_var_string(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[1..], ErrorKind::Eof)))
);
// Test case 4: invalid input
let input = [0xFF, 0x01, 0x02, 0x03, 0x04];
let result = read_var_string(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[2..], ErrorKind::Eof)))
);
// Test case 5: invalid var int encoding
let input = [0xFF, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01];
let result = read_var_string(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[7..], ErrorKind::Eof)))
);
// Test case 6: invalid input, invalid UTF-8 encoding
let input = [0x05, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
let result = read_var_string(&input);
assert_eq!(
result,
Err(Err::Error(Error::new(&input[..], ErrorKind::MapRes)))
);
}
#[test]
fn test_var_str_codec() {
test_var_str_enc_dec("".to_string());
test_var_str_enc_dec(" ".to_string());
test_var_str_enc_dec("abcde".to_string());
test_var_str_enc_dec("🃒🃓🃟☗🀥🀫∺∼≂≇⓵➎⓷➏‍".to_string());
}
fn test_var_str_enc_dec(input: String) {
let mut buf = Vec::<u8>::new();
write_var_string(&mut buf, input.clone()).unwrap();
let (rest, decoded_str) = read_var_string(buf.as_bytes()).unwrap();
assert_eq!(decoded_str, input);
assert_eq!(rest.len(), 0);
}
}

View File

@@ -0,0 +1,253 @@
use std::{cmp::max, collections::hash_map::Entry};
use super::*;
use crate::sync::Arc;
pub type AwarenessCallback = Arc<dyn Fn(&Awareness, AwarenessEvent) + Send + Sync + 'static>;
pub struct Awareness {
awareness: AwarenessStates,
callback: Option<AwarenessCallback>,
local_id: u64,
}
impl Awareness {
pub fn new(local_id: u64) -> Self {
Self {
awareness: AwarenessStates::new(),
callback: None,
local_id,
}
}
pub fn on_update(&mut self, f: impl Fn(&Awareness, AwarenessEvent) + Send + Sync + 'static) {
self.callback = Some(Arc::new(f));
}
pub fn get_states(&self) -> &AwarenessStates {
&self.awareness
}
pub fn get_local_state(&self) -> Option<String> {
self
.awareness
.get(&self.local_id)
.map(|state| state.content.clone())
}
fn mut_local_state(&mut self) -> &mut AwarenessState {
self.awareness.entry(self.local_id).or_default()
}
pub fn set_local_state(&mut self, content: String) {
self.mut_local_state().set_content(content);
if let Some(cb) = self.callback.as_ref() {
cb(
self,
AwarenessEventBuilder::new().update(self.local_id).build(),
);
}
}
pub fn clear_local_state(&mut self) {
self.mut_local_state().delete();
if let Some(cb) = self.callback.as_ref() {
cb(
self,
AwarenessEventBuilder::new().remove(self.local_id).build(),
);
}
}
pub fn apply_update(&mut self, update: AwarenessStates) {
let mut event = AwarenessEventBuilder::new();
for (client_id, state) in update {
match self.awareness.entry(client_id) {
Entry::Occupied(mut entry) => {
let prev_state = entry.get_mut();
if client_id == self.local_id {
// ignore remote update about local client and
// add clock to overwrite remote data
prev_state.set_clock(max(prev_state.clock, state.clock) + 1);
event.update(client_id);
continue;
}
if prev_state.clock < state.clock {
if state.is_deleted() {
prev_state.delete();
event.remove(client_id);
} else {
*prev_state = state;
event.update(client_id);
}
}
}
Entry::Vacant(entry) => {
entry.insert(state);
event.add(client_id);
}
}
}
if let Some(cb) = self.callback.as_ref() {
cb(self, event.build());
}
}
}
pub struct AwarenessEvent {
added: Vec<u64>,
updated: Vec<u64>,
removed: Vec<u64>,
}
impl AwarenessEvent {
pub fn get_updated(&self, states: &AwarenessStates) -> AwarenessStates {
states
.iter()
.filter(|(id, _)| {
self.added.contains(id) || self.updated.contains(id) || self.removed.contains(id)
})
.map(|(id, state)| (*id, state.clone()))
.collect()
}
}
struct AwarenessEventBuilder {
added: Vec<u64>,
updated: Vec<u64>,
removed: Vec<u64>,
}
impl AwarenessEventBuilder {
fn new() -> Self {
Self {
added: Vec::new(),
updated: Vec::new(),
removed: Vec::new(),
}
}
fn add(&mut self, client_id: u64) -> &mut Self {
self.added.push(client_id);
self
}
fn update(&mut self, client_id: u64) -> &mut Self {
self.updated.push(client_id);
self
}
fn remove(&mut self, client_id: u64) -> &mut Self {
self.removed.push(client_id);
self
}
fn build(&mut self) -> AwarenessEvent {
AwarenessEvent {
added: self.added.clone(),
updated: self.updated.clone(),
removed: self.removed.clone(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sync::{Mutex, MutexGuard};
#[test]
fn test_awareness() {
loom_model!({
let mut awareness = Awareness::new(0);
{
// init state
assert_eq!(awareness.local_id, 0);
assert_eq!(awareness.awareness.len(), 0);
}
{
// local state
awareness.set_local_state("test".to_string());
assert_eq!(awareness.get_local_state(), Some("test".to_string()));
awareness.clear_local_state();
assert_eq!(awareness.get_local_state(), Some("null".to_string()));
}
{
// apply remote update
let mut states = AwarenessStates::new();
states.insert(0, AwarenessState::new(2, "test0".to_string()));
states.insert(1, AwarenessState::new(2, "test1".to_string()));
awareness.apply_update(states);
assert!(awareness.get_states().contains_key(&1));
// local state will not apply
assert_eq!(
awareness.get_states().get(&0).unwrap().content,
"null".to_string()
);
assert_eq!(
awareness.get_states().get(&1).unwrap().content,
"test1".to_string()
);
}
{
// callback
let values: Arc<Mutex<Vec<AwarenessEvent>>> = Arc::new(Mutex::new(Vec::new()));
let callback_values = Arc::clone(&values);
awareness.on_update(move |_, event| {
let mut values = callback_values.lock().unwrap();
values.push(event);
});
let mut new_states = AwarenessStates::new();
// exists in local awareness: update
new_states.insert(1, AwarenessState::new(3, "test update".to_string()));
// not exists in local awareness: add
new_states.insert(2, AwarenessState::new(1, "test update".to_string()));
// not exists in local awareness: add
new_states.insert(3, AwarenessState::new(1, "null".to_string()));
// not exists in local awareness: add
new_states.insert(4, AwarenessState::new(1, "test update".to_string()));
awareness.apply_update(new_states);
let mut new_states = AwarenessStates::new();
// exists in local awareness: delete
new_states.insert(4, AwarenessState::new(2, "null".to_string()));
awareness.apply_update(new_states);
awareness.set_local_state("test".to_string());
awareness.clear_local_state();
let values: MutexGuard<Vec<AwarenessEvent>> = values.lock().unwrap();
assert_eq!(values.len(), 4);
let event = values.first().unwrap();
let mut added = event.added.clone();
added.sort();
assert_eq!(added, [2, 3, 4]);
assert_eq!(event.updated, [1]);
assert_eq!(
event.get_updated(awareness.get_states()).get(&1).unwrap(),
&AwarenessState::new(3, "test update".to_string())
);
let event = values.get(1).unwrap();
assert_eq!(event.removed, [4]);
let event = values.get(2).unwrap();
assert_eq!(event.updated, [0]);
let event = values.get(3).unwrap();
assert_eq!(event.removed, [0]);
}
});
}
}

View File

@@ -0,0 +1,716 @@
use std::{
fmt::{self, Display},
ops::RangeInclusive,
};
use ordered_float::OrderedFloat;
use super::*;
const MAX_JS_INT: i64 = 0x001F_FFFF_FFFF_FFFF;
// The smallest int in js number.
const MIN_JS_INT: i64 = -MAX_JS_INT;
pub const JS_INT_RANGE: RangeInclusive<i64> = MIN_JS_INT..=MAX_JS_INT;
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub enum Any {
Undefined,
Null,
Integer(i32),
Float32(OrderedFloat<f32>),
Float64(OrderedFloat<f64>),
BigInt64(i64),
False,
True,
String(String),
// FIXME: due to macro's overflow evaluating, we can't use proptest here
#[cfg_attr(test, proptest(skip))]
Object(HashMap<String, Any>),
#[cfg_attr(test, proptest(skip))]
Array(Vec<Any>),
Binary(Vec<u8>),
}
impl<R: CrdtReader> CrdtRead<R> for Any {
fn read(reader: &mut R) -> JwstCodecResult<Self> {
let index = reader.read_u8()?;
match 127u8.overflowing_sub(index).0 {
0 => Ok(Any::Undefined),
1 => Ok(Any::Null),
// in yjs implementation, flag 2 only save 32bit integer
2 => Ok(Any::Integer(reader.read_var_i32()?)), // Integer
3 => Ok(Any::Float32(reader.read_f32_be()?.into())), // Float32
4 => Ok(Any::Float64(reader.read_f64_be()?.into())), // Float64
5 => Ok(Any::BigInt64(reader.read_i64_be()?)), // BigInt64
6 => Ok(Any::False),
7 => Ok(Any::True),
8 => Ok(Any::String(reader.read_var_string()?)), // String
9 => {
let len = reader.read_var_u64()?;
let object = (0..len)
.map(|_| Self::read_key_value(reader))
.collect::<Result<Vec<_>, _>>()?;
Ok(Any::Object(object.into_iter().collect()))
} // Object
10 => {
let len = reader.read_var_u64()?;
let any = (0..len)
.map(|_| Self::read(reader))
.collect::<Result<Vec<_>, _>>()?;
Ok(Any::Array(any))
} // Array
11 => {
let binary = reader.read_var_buffer()?;
Ok(Any::Binary(binary.to_vec()))
} // Binary
_ => Ok(Any::Undefined),
}
}
}
impl<W: CrdtWriter> CrdtWrite<W> for Any {
fn write(&self, writer: &mut W) -> JwstCodecResult {
match self {
Any::Undefined => writer.write_u8(127)?,
Any::Null => writer.write_u8(127 - 1)?,
Any::Integer(value) => {
writer.write_u8(127 - 2)?;
writer.write_var_i32(*value)?;
}
Any::Float32(value) => {
writer.write_u8(127 - 3)?;
writer.write_f32_be(value.into_inner())?;
}
Any::Float64(value) => {
writer.write_u8(127 - 4)?;
writer.write_f64_be(value.into_inner())?;
}
Any::BigInt64(value) => {
writer.write_u8(127 - 5)?;
writer.write_i64_be(*value)?;
}
Any::False => writer.write_u8(127 - 6)?,
Any::True => writer.write_u8(127 - 7)?,
Any::String(value) => {
writer.write_u8(127 - 8)?;
writer.write_var_string(value)?;
}
Any::Object(value) => {
writer.write_u8(127 - 9)?;
writer.write_var_u64(value.len() as u64)?;
for (key, value) in value {
Self::write_key_value(writer, key, value)?;
}
}
Any::Array(values) => {
writer.write_u8(127 - 10)?;
writer.write_var_u64(values.len() as u64)?;
for value in values {
value.write(writer)?;
}
}
Any::Binary(value) => {
writer.write_u8(127 - 11)?;
writer.write_var_buffer(value)?;
}
}
Ok(())
}
}
impl Any {
fn read_key_value<R: CrdtReader>(reader: &mut R) -> JwstCodecResult<(String, Any)> {
let key = reader.read_var_string()?;
let value = Self::read(reader)?;
Ok((key, value))
}
fn write_key_value<W: CrdtWriter>(writer: &mut W, key: &str, value: &Any) -> JwstCodecResult {
writer.write_var_string(key)?;
value.write(writer)?;
Ok(())
}
pub(crate) fn read_multiple<R: CrdtReader>(reader: &mut R) -> JwstCodecResult<Vec<Any>> {
let len = reader.read_var_u64()? as usize;
let mut vec = Vec::with_capacity(len);
for _ in 0..len {
vec.push(Any::read(reader)?);
}
Ok(vec)
}
pub(crate) fn write_multiple<W: CrdtWriter>(writer: &mut W, any: &[Any]) -> JwstCodecResult {
writer.write_var_u64(any.len() as u64)?;
for value in any {
value.write(writer)?;
}
Ok(())
}
}
macro_rules! impl_primitive_from {
(unsigned, $($ty: ty),*) => {
$(
impl From<$ty> for Any {
fn from(value: $ty) -> Self {
// INFO: i64::MAX > value > u64::MAX will cut down
// yjs binary does not consider the case that the int size exceeds i64
let int: i64 = value as i64;
// handle the behavior same as yjs
if JS_INT_RANGE.contains(&int) {
if int <= i32::MAX as i64 {
Self::Integer(int as i32)
} else if int as f32 as i64 == int {
Self::Float32((int as f32).into())
} else {
Self::Float64((int as f64).into())
}
} else {
Self::BigInt64(int)
}
}
}
)*
};
(signed, $($ty: ty),*) => {
$(
impl From<$ty> for Any {
fn from(value: $ty) -> Self {
let int: i64 = value.into();
// handle the behavior same as yjs
if JS_INT_RANGE.contains(&int) {
if int <= i32::MAX as i64 {
Self::Integer(int as i32)
} else if int as f32 as i64 == int {
Self::Float32((int as f32).into())
} else {
Self::Float64((int as f64).into())
}
} else {
Self::BigInt64(int)
}
}
}
)*
};
(string, $($ty: ty),*) => {
$(
impl From<$ty> for Any {
fn from(value: $ty) -> Self {
Self::String(value.into())
}
}
)*
};
}
impl_primitive_from!(unsigned, u8, u16, u32, u64);
impl_primitive_from!(signed, i8, i16, i32, i64);
impl_primitive_from!(string, String, &str);
impl From<usize> for Any {
fn from(value: usize) -> Self {
(value as u64).into()
}
}
impl From<isize> for Any {
fn from(value: isize) -> Self {
(value as i64).into()
}
}
impl From<f32> for Any {
fn from(value: f32) -> Self {
Self::Float32(value.into())
}
}
impl From<f64> for Any {
fn from(value: f64) -> Self {
if value.trunc() == value {
(value as i64).into()
} else if value as f32 as f64 == value {
Self::Float32((value as f32).into())
} else {
Self::Float64(value.into())
}
}
}
impl From<bool> for Any {
fn from(value: bool) -> Self {
if value {
Self::True
} else {
Self::False
}
}
}
impl TryFrom<Any> for String {
type Error = JwstCodecError;
fn try_from(value: Any) -> Result<Self, Self::Error> {
match value {
Any::String(s) => Ok(s),
_ => Err(JwstCodecError::UnexpectedType("String")),
}
}
}
impl TryFrom<Any> for HashMap<String, Any> {
type Error = JwstCodecError;
fn try_from(value: Any) -> Result<Self, Self::Error> {
match value {
Any::Object(map) => Ok(map),
_ => Err(JwstCodecError::UnexpectedType("Object")),
}
}
}
impl TryFrom<Any> for Vec<Any> {
type Error = JwstCodecError;
fn try_from(value: Any) -> Result<Self, Self::Error> {
match value {
Any::Array(vec) => Ok(vec),
_ => Err(JwstCodecError::UnexpectedType("Array")),
}
}
}
impl TryFrom<Any> for bool {
type Error = JwstCodecError;
fn try_from(value: Any) -> Result<Self, Self::Error> {
match value {
Any::True => Ok(true),
Any::False => Ok(false),
_ => Err(JwstCodecError::UnexpectedType("Boolean")),
}
}
}
impl FromIterator<Any> for Any {
fn from_iter<I: IntoIterator<Item = Any>>(iter: I) -> Self {
Self::Array(iter.into_iter().collect())
}
}
impl<'a> FromIterator<&'a Any> for Any {
fn from_iter<I: IntoIterator<Item = &'a Any>>(iter: I) -> Self {
Self::Array(iter.into_iter().cloned().collect())
}
}
impl FromIterator<(String, Any)> for Any {
fn from_iter<I: IntoIterator<Item = (String, Any)>>(iter: I) -> Self {
let mut map = HashMap::new();
map.extend(iter);
Self::Object(map)
}
}
impl From<HashMap<String, Any>> for Any {
fn from(value: HashMap<String, Any>) -> Self {
Self::Object(value)
}
}
impl From<Vec<u8>> for Any {
fn from(value: Vec<u8>) -> Self {
Self::Binary(value)
}
}
impl From<&[u8]> for Any {
fn from(value: &[u8]) -> Self {
Self::Binary(value.into())
}
}
// TODO: impl for Any::Undefined
impl<T: Into<Any>> From<Option<T>> for Any {
fn from(value: Option<T>) -> Self {
if let Some(val) = value {
val.into()
} else {
Any::Null
}
}
}
#[cfg(feature = "serde_json")]
impl From<serde_json::Value> for Any {
fn from(value: serde_json::Value) -> Self {
match value {
serde_json::Value::Null => Self::Null,
serde_json::Value::Bool(b) => {
if b {
Self::True
} else {
Self::False
}
}
serde_json::Value::Number(n) => {
if n.is_f64() {
Self::Float64(n.as_f64().unwrap().into())
} else if n.is_i64() {
Self::Integer(n.as_i64().unwrap() as i32)
} else {
Self::Integer(n.as_u64().unwrap() as i32)
}
}
serde_json::Value::String(s) => Self::String(s),
serde_json::Value::Array(vec) => {
Self::Array(vec.into_iter().map(|v| v.into()).collect::<Vec<_>>())
}
serde_json::Value::Object(obj) => {
Self::Object(obj.into_iter().map(|(k, v)| (k, v.into())).collect())
}
}
}
}
impl<'de> serde::Deserialize<'de> for Any {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{Error, MapAccess, SeqAccess, Visitor};
struct ValueVisitor;
impl<'de> Visitor<'de> for ValueVisitor {
type Value = Any;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("any valid JSON value")
}
#[inline]
fn visit_bool<E>(self, value: bool) -> Result<Any, E> {
Ok(if value { Any::True } else { Any::False })
}
#[inline]
fn visit_i64<E>(self, value: i64) -> Result<Any, E> {
Ok(Any::BigInt64(value))
}
#[inline]
fn visit_u64<E>(self, value: u64) -> Result<Any, E> {
Ok((value as i64).into())
}
#[inline]
fn visit_f64<E>(self, value: f64) -> Result<Any, E> {
Ok(Any::Float64(OrderedFloat(value)))
}
#[inline]
fn visit_str<E>(self, value: &str) -> Result<Any, E>
where
E: Error,
{
self.visit_string(String::from(value))
}
#[inline]
fn visit_string<E>(self, value: String) -> Result<Any, E> {
Ok(Any::String(value))
}
#[inline]
fn visit_none<E>(self) -> Result<Any, E> {
Ok(Any::Null)
}
#[inline]
fn visit_some<D>(self, deserializer: D) -> Result<Any, D::Error>
where
D: serde::Deserializer<'de>,
{
serde::Deserialize::deserialize(deserializer)
}
#[inline]
fn visit_unit<E>(self) -> Result<Any, E> {
Ok(Any::Null)
}
#[inline]
fn visit_seq<V>(self, mut visitor: V) -> Result<Any, V::Error>
where
V: SeqAccess<'de>,
{
let mut vec = Vec::new();
while let Some(elem) = visitor.next_element()? {
vec.push(elem);
}
Ok(Any::Array(vec))
}
fn visit_map<V>(self, mut visitor: V) -> Result<Any, V::Error>
where
V: MapAccess<'de>,
{
match visitor.next_key::<String>()? {
Some(k) => {
let mut values = HashMap::new();
values.insert(k, visitor.next_value()?);
while let Some((key, value)) = visitor.next_entry()? {
values.insert(key, value);
}
Ok(Any::Object(values))
}
None => Ok(Any::Object(HashMap::new())),
}
}
}
deserializer.deserialize_any(ValueVisitor)
}
}
impl serde::Serialize for Any {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::{SerializeMap, SerializeSeq};
match self {
Any::Null => serializer.serialize_none(),
Any::Undefined => serializer.serialize_none(),
Any::True => serializer.serialize_bool(true),
Any::False => serializer.serialize_bool(false),
Any::Float32(value) => serializer.serialize_f32(value.0),
Any::Float64(value) => serializer.serialize_f64(value.0),
Any::Integer(value) => serializer.serialize_i32(*value),
Any::BigInt64(value) => serializer.serialize_i64(*value),
Any::String(value) => serializer.serialize_str(value.as_ref()),
Any::Array(values) => {
let mut seq = serializer.serialize_seq(Some(values.len()))?;
for value in values.iter() {
seq.serialize_element(value)?;
}
seq.end()
}
Any::Object(entries) => {
let mut map = serializer.serialize_map(Some(entries.len()))?;
for (key, value) in entries.iter() {
map.serialize_entry(key, value)?;
}
map.end()
}
Any::Binary(buf) => serializer.serialize_bytes(buf),
}
}
}
impl Display for Any {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::True => write!(f, "true"),
Self::False => write!(f, "false"),
Self::String(s) => write!(f, "\"{}\"", s),
Self::Integer(i) => write!(f, "{}", i),
Self::Float32(v) => write!(f, "{}", v),
Self::Float64(v) => write!(f, "{}", v),
Self::BigInt64(v) => write!(f, "{}", v),
Self::Object(map) => {
write!(f, "{{")?;
for (i, (key, value)) in map.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}: {}", key, value)?;
}
write!(f, "}}")
}
Self::Array(vec) => {
write!(f, "[")?;
for (i, value) in vec.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}", value)?;
}
write!(f, "]")
}
Self::Binary(buf) => write!(f, "{:?}", buf),
Self::Undefined => write!(f, "undefined"),
Self::Null => write!(f, "null"),
}
}
}
#[cfg(test)]
mod tests {
use proptest::{collection::vec, prelude::*};
use super::*;
#[test]
fn test_any_codec() {
let any = Any::Object(
vec![
("name".to_string(), Any::String("Alice".to_string())),
("age".to_string(), Any::Integer(25)),
(
"contacts".to_string(),
Any::Array(vec![
Any::Object(
vec![
("type".to_string(), Any::String("Mobile".to_string())),
("number".to_string(), Any::String("1234567890".to_string())),
]
.into_iter()
.collect(),
),
Any::Object(
vec![
("type".to_string(), Any::String("Email".to_string())),
(
"address".to_string(),
Any::String("alice@example.com".to_string()),
),
]
.into_iter()
.collect(),
),
Any::Undefined,
]),
),
(
"standard_data".to_string(),
Any::Array(vec![
Any::Undefined,
Any::Null,
Any::Integer(114514),
Any::Float32(114.514.into()),
Any::Float64(115.514.into()),
Any::BigInt64(-1145141919810),
Any::False,
Any::True,
Any::Object(
vec![
("name".to_string(), Any::String("tadokoro".to_string())),
("age".to_string(), Any::String("24".to_string())),
("profession".to_string(), Any::String("student".to_string())),
]
.into_iter()
.collect(),
),
Any::Binary(vec![1, 2, 3, 4, 5]),
]),
),
]
.into_iter()
.collect(),
);
let mut encoder = RawEncoder::default();
any.write(&mut encoder).unwrap();
let encoded = encoder.into_inner();
let mut decoder = RawDecoder::new(&encoded);
let decoded = Any::read(&mut decoder).unwrap();
assert_eq!(any, decoded);
}
proptest! {
#[test]
#[cfg_attr(miri, ignore)]
fn test_random_any(any in vec(any::<Any>(), 0..100)) {
for any in &any {
let mut encoder = RawEncoder::default();
any.write(&mut encoder).unwrap();
let encoded = encoder.into_inner();
let mut decoder = RawDecoder::new(&encoded);
let decoded = Any::read(&mut decoder).unwrap();
assert_eq!(any, &decoded);
}
}
}
#[test]
fn test_convert_to_any() {
let any: Vec<Any> = vec![
42u8.into(),
42u16.into(),
42u32.into(),
42u64.into(),
114.514f32.into(),
1919.810f64.into(),
(-42i8).into(),
(-42i16).into(),
(-42i32).into(),
(-42i64).into(),
false.into(),
true.into(),
"JWST".to_string().into(),
"OctoBase".into(),
vec![1u8, 9, 1, 9].into(),
(&[8u8, 1, 0][..]).into(),
[Any::True, 42u8.into()].iter().collect(),
];
assert_eq!(
any,
vec![
Any::Integer(42),
Any::Integer(42),
Any::Integer(42),
Any::Integer(42),
Any::Float32(114.514.into()),
Any::Float64(1919.810.into()),
Any::Integer(-42),
Any::Integer(-42),
Any::Integer(-42),
Any::Integer(-42),
Any::False,
Any::True,
Any::String("JWST".to_string()),
Any::String("OctoBase".to_string()),
Any::Binary(vec![1, 9, 1, 9]),
Any::Binary(vec![8, 1, 0]),
Any::Array(vec![Any::True, Any::Integer(42)])
]
);
assert_eq!(
vec![("key".to_string(), 10u64.into())]
.into_iter()
.collect::<Any>(),
Any::Object(HashMap::from_iter(vec![(
"key".to_string(),
Any::Integer(10)
)]))
);
let any: Any = 10u64.into();
assert_eq!(
[any].iter().collect::<Any>(),
Any::Array(vec![Any::Integer(10)])
);
}
}

View File

@@ -0,0 +1,417 @@
use super::*;
#[derive(Clone)]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub(crate) enum Content {
Deleted(u64),
Json(Vec<Option<String>>),
Binary(Vec<u8>),
String(String),
#[cfg_attr(test, proptest(skip))]
Embed(Any),
#[cfg_attr(test, proptest(skip))]
Format {
key: String,
value: Any,
},
#[cfg_attr(test, proptest(skip))]
Type(YTypeRef),
Any(Vec<Any>),
Doc {
guid: String,
opts: Any,
},
}
unsafe impl Send for Content {}
unsafe impl Sync for Content {}
impl From<Any> for Content {
fn from(value: Any) -> Self {
match value {
Any::Undefined
| Any::Null
| Any::Integer(_)
| Any::Float32(_)
| Any::Float64(_)
| Any::BigInt64(_)
| Any::False
| Any::True
| Any::String(_)
| Any::Object(_) => Content::Any(vec![value; 1]),
Any::Array(v) => Content::Any(v),
Any::Binary(b) => Content::Binary(b),
}
}
}
impl PartialEq for Content {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Deleted(len1), Self::Deleted(len2)) => len1 == len2,
(Self::Json(vec1), Self::Json(vec2)) => vec1 == vec2,
(Self::Binary(vec1), Self::Binary(vec2)) => vec1 == vec2,
(Self::String(str1), Self::String(str2)) => str1 == str2,
(Self::Embed(json1), Self::Embed(json2)) => json1 == json2,
(
Self::Format {
key: key1,
value: value1,
},
Self::Format {
key: key2,
value: value2,
},
) => key1 == key2 && value1 == value2,
(Self::Any(any1), Self::Any(any2)) => any1 == any2,
(Self::Doc { guid: guid1, .. }, Self::Doc { guid: guid2, .. }) => guid1 == guid2,
(Self::Type(ty1), Self::Type(ty2)) => ty1 == ty2,
_ => false,
}
}
}
impl std::fmt::Debug for Content {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Deleted(arg0) => f.debug_tuple("Deleted").field(arg0).finish(),
Self::Json(arg0) => f
.debug_tuple("JSON")
.field(&format!("Vec [len: {}]", arg0.len()))
.finish(),
Self::Binary(arg0) => f
.debug_tuple("Binary")
.field(&format!("Binary [len: {}]", arg0.len()))
.finish(),
Self::String(arg0) => f.debug_tuple("String").field(arg0).finish(),
Self::Embed(arg0) => f.debug_tuple("Embed").field(arg0).finish(),
Self::Format { key, value } => f
.debug_struct("Format")
.field("key", key)
.field("value", value)
.finish(),
Self::Type(arg0) => f
.debug_tuple("Type")
.field(&arg0.ty().unwrap().kind())
.finish(),
Self::Any(arg0) => f.debug_tuple("Any").field(arg0).finish(),
Self::Doc { guid, opts } => f
.debug_struct("Doc")
.field("guid", guid)
.field("opts", opts)
.finish(),
}
}
}
impl Content {
pub(crate) fn read<R: CrdtReader>(decoder: &mut R, tag_type: u8) -> JwstCodecResult<Self> {
match tag_type {
1 => Ok(Self::Deleted(decoder.read_var_u64()?)), // Deleted
2 => {
let len = decoder.read_var_u64()?;
let strings = (0..len)
.map(|_| {
decoder
.read_var_string()
.map(|s| (s != "undefined").then_some(s))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(Self::Json(strings))
} // JSON
3 => Ok(Self::Binary(decoder.read_var_buffer()?.to_vec())), // Binary
4 => Ok(Self::String(decoder.read_var_string()?)), // String
5 => {
let string = decoder.read_var_string()?;
let json =
serde_json::from_str(&string).map_err(|_| JwstCodecError::DamagedDocumentJson)?;
Ok(Self::Embed(json))
} // Embed
6 => {
let key = decoder.read_var_string()?;
let value = decoder.read_var_string()?;
let value =
serde_json::from_str(&value).map_err(|_| JwstCodecError::DamagedDocumentJson)?;
Ok(Self::Format { key, value })
} // Format
7 => {
let type_ref = decoder.read_var_u64()?;
let kind = YTypeKind::from(type_ref);
let tag_name = match kind {
YTypeKind::XMLElement | YTypeKind::XMLHook => Some(decoder.read_var_string()?),
YTypeKind::Unknown => {
return Err(JwstCodecError::IncompleteDocument(format!(
"Unknown y type: {type_ref}"
)));
}
_ => None,
};
Ok(Self::Type(YTypeRef::new(kind, tag_name)))
} // YType
8 => Ok(Self::Any(Any::read_multiple(decoder)?)), // Any
9 => {
let guid = decoder.read_var_string()?;
let opts = Any::read(decoder)?;
Ok(Self::Doc { guid, opts })
} // Doc
tag_type => Err(JwstCodecError::IncompleteDocument(format!(
"Unknown content type: {tag_type}"
))),
}
}
pub(crate) fn get_info(&self) -> u8 {
match self {
Self::Deleted(_) => 1,
Self::Json(_) => 2,
Self::Binary(_) => 3,
Self::String(_) => 4,
Self::Embed(_) => 5,
Self::Format { .. } => 6,
Self::Type(_) => 7,
Self::Any(_) => 8,
Self::Doc { .. } => 9,
}
}
pub(crate) fn write<W: CrdtWriter>(&self, encoder: &mut W) -> JwstCodecResult {
match self {
Self::Deleted(len) => {
encoder.write_var_u64(*len)?;
}
Self::Json(strings) => {
encoder.write_var_u64(strings.len() as u64)?;
for string in strings {
match string {
Some(string) => encoder.write_var_string(string)?,
None => encoder.write_var_string("undefined")?,
}
}
}
Self::Binary(buffer) => {
encoder.write_var_buffer(buffer)?;
}
Self::String(string) => {
encoder.write_var_string(string)?;
}
Self::Embed(val) => {
encoder.write_var_string(
serde_json::to_string(val).map_err(|_| JwstCodecError::DamagedDocumentJson)?,
)?;
}
Self::Format { key, value } => {
encoder.write_var_string(key)?;
encoder.write_var_string(
serde_json::to_string(value).map_err(|_| JwstCodecError::DamagedDocumentJson)?,
)?;
}
Self::Type(ty) => {
if let Some(ty) = ty.ty() {
let type_ref = u64::from(ty.kind());
encoder.write_var_u64(type_ref)?;
if matches!(ty.kind(), YTypeKind::XMLElement | YTypeKind::XMLHook) {
encoder.write_var_string(ty.name.as_ref().unwrap())?;
}
}
}
Self::Any(any) => {
Any::write_multiple(encoder, any)?;
}
Self::Doc { guid, opts } => {
encoder.write_var_string(guid)?;
opts.write(encoder)?;
}
}
Ok(())
}
pub fn clock_len(&self) -> u64 {
match self {
Self::Deleted(len) => *len,
Self::Json(strings) => strings.len() as u64,
// TODO: need a custom wrapper with length cached, this cost too much
Self::String(string) => string.chars().map(|c| c.len_utf16()).sum::<usize>() as u64,
Self::Any(any) => any.len() as u64,
Self::Binary(_) | Self::Embed(_) | Self::Format { .. } | Self::Type(_) | Self::Doc { .. } => {
1
}
}
}
pub fn countable(&self) -> bool {
!matches!(self, Content::Format { .. } | Content::Deleted(_))
}
#[allow(dead_code)]
pub fn splittable(&self) -> bool {
matches!(
self,
Self::String { .. } | Self::Any { .. } | Self::Json { .. }
)
}
pub fn split(&self, diff: u64) -> JwstCodecResult<(Self, Self)> {
match self {
Self::String(str) => {
let (left, right) = Self::split_as_utf16_str(str.as_str(), diff);
Ok((
Self::String(left.to_string()),
Self::String(right.to_string()),
))
}
Self::Json(vec) => {
let (left, right) = vec.split_at(diff as usize);
Ok((Self::Json(left.to_owned()), Self::Json(right.to_owned())))
}
Self::Any(vec) => {
let (left, right) = vec.split_at(diff as usize);
Ok((Self::Any(left.to_owned()), Self::Any(right.to_owned())))
}
Self::Deleted(len) => {
let (left, right) = (diff, *len - diff);
Ok((Self::Deleted(left), Self::Deleted(right)))
}
_ => Err(JwstCodecError::ContentSplitNotSupport(diff)),
}
}
/// consider `offset` as a utf-16 encoded string offset
fn split_as_utf16_str(s: &str, offset: u64) -> (&str, &str) {
let mut utf_16_offset = 0;
let mut utf_8_offset = 0;
for ch in s.chars() {
utf_16_offset += ch.len_utf16();
utf_8_offset += ch.len_utf8();
if utf_16_offset as u64 >= offset {
break;
}
}
s.split_at(utf_8_offset)
}
}
#[cfg(test)]
mod tests {
use proptest::{collection::vec, prelude::*};
use super::*;
fn content_round_trip(content: &Content) -> JwstCodecResult {
let mut writer = RawEncoder::default();
writer.write_u8(content.get_info())?;
content.write(&mut writer)?;
let update = writer.into_inner();
let mut reader = RawDecoder::new(&update);
let tag_type = reader.read_u8()?;
assert_eq!(Content::read(&mut reader, tag_type)?, *content);
Ok(())
}
#[test]
fn test_content() {
loom_model!({
let contents = [
Content::Deleted(42),
Content::Json(vec![
None,
Some("test_1".to_string()),
Some("test_2".to_string()),
]),
Content::Binary(vec![1, 2, 3]),
Content::String("hello".to_string()),
Content::Embed(Any::True),
Content::Format {
key: "key".to_string(),
value: Any::Integer(42),
},
Content::Type(YTypeRef::new(YTypeKind::Array, None)),
Content::Type(YTypeRef::new(YTypeKind::Map, None)),
Content::Type(YTypeRef::new(YTypeKind::Text, None)),
Content::Type(YTypeRef::new(
YTypeKind::XMLElement,
Some("test".to_string()),
)),
Content::Type(YTypeRef::new(YTypeKind::XMLFragment, None)),
Content::Type(YTypeRef::new(YTypeKind::XMLHook, Some("test".to_string()))),
Content::Type(YTypeRef::new(YTypeKind::XMLText, None)),
Content::Any(vec![Any::BigInt64(42), Any::String("Test Any".to_string())]),
Content::Doc {
guid: "my_guid".to_string(),
opts: Any::BigInt64(42),
},
];
for content in &contents {
content_round_trip(content).unwrap();
}
});
}
#[test]
fn test_content_split() {
let contents = [
Content::String("hello".to_string()),
Content::Json(vec![
None,
Some("test_1".to_string()),
Some("test_2".to_string()),
]),
Content::Any(vec![Any::BigInt64(42), Any::String("Test Any".to_string())]),
Content::Binary(vec![]),
];
{
let (left, right) = contents[0].split(1).unwrap();
assert!(contents[0].splittable());
assert_eq!(left, Content::String("h".to_string()));
assert_eq!(right, Content::String("ello".to_string()));
}
{
let (left, right) = contents[1].split(1).unwrap();
assert!(contents[1].splittable());
assert_eq!(left, Content::Json(vec![None]));
assert_eq!(
right,
Content::Json(vec![Some("test_1".to_string()), Some("test_2".to_string())])
);
}
{
let (left, right) = contents[2].split(1).unwrap();
assert!(contents[2].splittable());
assert_eq!(left, Content::Any(vec![Any::BigInt64(42)]));
assert_eq!(
right,
Content::Any(vec![Any::String("Test Any".to_string())])
);
}
{
assert!(!contents[3].splittable());
assert_eq!(
contents[3].split(2),
Err(JwstCodecError::ContentSplitNotSupport(2))
);
}
}
proptest! {
#[test]
#[cfg_attr(miri, ignore)]
fn test_random_content(contents in vec(any::<Content>(), 0..10)) {
for content in &contents {
content_round_trip(content).unwrap();
}
}
}
}

View File

@@ -0,0 +1,233 @@
use std::{
collections::{hash_map::Entry, VecDeque},
ops::{Deref, DerefMut, Range},
};
use super::*;
use crate::doc::OrderRange;
impl<R: CrdtReader> CrdtRead<R> for Range<u64> {
fn read(decoder: &mut R) -> JwstCodecResult<Self> {
let clock = decoder.read_var_u64()?;
let len = decoder.read_var_u64()?;
Ok(clock..clock + len)
}
}
impl<W: CrdtWriter> CrdtWrite<W> for Range<u64> {
fn write(&self, encoder: &mut W) -> JwstCodecResult {
encoder.write_var_u64(self.start)?;
encoder.write_var_u64(self.end - self.start)?;
Ok(())
}
}
impl<R: CrdtReader> CrdtRead<R> for OrderRange {
fn read(decoder: &mut R) -> JwstCodecResult<Self> {
let num_of_deletes = decoder.read_var_u64()? as usize;
if num_of_deletes == 1 {
Ok(OrderRange::Range(Range::<u64>::read(decoder)?))
} else {
let mut deletes = VecDeque::with_capacity(num_of_deletes);
for _ in 0..num_of_deletes {
deletes.push_back(Range::<u64>::read(decoder)?);
}
Ok(OrderRange::Fragment(deletes))
}
}
}
impl<W: CrdtWriter> CrdtWrite<W> for OrderRange {
fn write(&self, encoder: &mut W) -> JwstCodecResult {
match self {
OrderRange::Range(range) => {
encoder.write_var_u64(1)?;
range.write(encoder)?;
}
OrderRange::Fragment(ranges) => {
encoder.write_var_u64(ranges.len() as u64)?;
for range in ranges {
range.write(encoder)?;
}
}
}
Ok(())
}
}
#[derive(Debug, Default, Clone, PartialEq)]
pub struct DeleteSet(pub ClientMap<OrderRange>);
impl Deref for DeleteSet {
type Target = ClientMap<OrderRange>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<const N: usize> From<[(Client, Vec<Range<u64>>); N]> for DeleteSet {
fn from(value: [(Client, Vec<Range<u64>>); N]) -> Self {
let mut map = ClientMap::with_capacity(N);
for (client, ranges) in value {
map.insert(client, ranges.into());
}
Self(map)
}
}
impl DerefMut for DeleteSet {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl DeleteSet {
pub fn add(&mut self, client: Client, from: Clock, len: Clock) {
self.add_range(client, from..from + len);
}
pub fn add_range(&mut self, client: Client, range: Range<u64>) {
match self.0.entry(client) {
Entry::Occupied(e) => {
let r = e.into_mut();
if r.is_empty() {
*r = range.into();
} else {
r.push(range);
}
}
Entry::Vacant(e) => {
e.insert(range.into());
}
}
}
pub fn batch_add_ranges(&mut self, client: Client, ranges: Vec<Range<u64>>) {
match self.0.entry(client) {
Entry::Occupied(e) => {
e.into_mut().extend(ranges);
}
Entry::Vacant(e) => {
e.insert(ranges.into());
}
}
}
pub fn merge(&mut self, other: &Self) {
for (client, range) in &other.0 {
match self.0.entry(*client) {
Entry::Occupied(e) => {
e.into_mut().merge(range.clone());
}
Entry::Vacant(e) => {
e.insert(range.clone());
}
}
}
}
}
impl<R: CrdtReader> CrdtRead<R> for DeleteSet {
fn read(decoder: &mut R) -> JwstCodecResult<Self> {
let num_of_clients = decoder.read_var_u64()? as usize;
// See: [HASHMAP_SAFE_CAPACITY]
let mut map = ClientMap::with_capacity(num_of_clients.min(HASHMAP_SAFE_CAPACITY));
for _ in 0..num_of_clients {
let client = decoder.read_var_u64()?;
let deletes = OrderRange::read(decoder)?;
map.insert(client, deletes);
}
map.shrink_to_fit();
Ok(DeleteSet(map))
}
}
impl<W: CrdtWriter> CrdtWrite<W> for DeleteSet {
fn write(&self, encoder: &mut W) -> JwstCodecResult {
encoder.write_var_u64(self.len() as u64)?;
let mut clients = self.keys().copied().collect::<Vec<_>>();
// Descending
clients.sort_by(|a, b| b.cmp(a));
for client in clients {
encoder.write_var_u64(client)?;
self.get(&client).unwrap().write(encoder)?;
}
Ok(())
}
}
#[cfg(test)]
#[allow(clippy::single_range_in_vec_init)]
mod tests {
use super::*;
#[test]
fn test_delete_set_add() {
let delete_set = DeleteSet::from([
(1, vec![0..10, 20..30]),
(2, vec![0..5, 10..20]),
(3, vec![15..20, 30..35]),
(4, vec![0..10]),
]);
{
let mut delete_set = delete_set.clone();
delete_set.add(1, 5, 25);
assert_eq!(delete_set.get(&1), Some(&OrderRange::Range(0..30)));
}
{
let mut delete_set = delete_set;
delete_set.add(1, 5, 10);
assert_eq!(
delete_set.get(&1),
Some(&OrderRange::from(vec![0..15, 20..30]))
);
}
}
#[test]
fn test_delete_set_batch_push() {
let delete_set = DeleteSet::from([
(1, vec![0..10, 20..30]),
(2, vec![0..5, 10..20]),
(3, vec![15..20, 30..35]),
(4, vec![0..10]),
]);
{
let mut delete_set = delete_set.clone();
delete_set.batch_add_ranges(1, vec![0..5, 10..20]);
assert_eq!(delete_set.get(&1), Some(&OrderRange::Range(0..30)));
}
{
let mut delete_set = delete_set;
delete_set.batch_add_ranges(1, vec![40..50, 10..20]);
assert_eq!(
delete_set.get(&1),
Some(&OrderRange::from(vec![0..30, 40..50]))
);
}
}
#[test]
fn test_encode_decode() {
let delete_set = DeleteSet::from([(1, vec![0..10, 20..30]), (2, vec![0..5, 10..20])]);
let mut encoder = RawEncoder::default();
delete_set.write(&mut encoder).unwrap();
let update = encoder.into_inner();
let mut decoder = RawDecoder::new(&update);
let decoded = DeleteSet::read(&mut decoder).unwrap();
assert_eq!(delete_set, decoded);
}
}

View File

@@ -0,0 +1,68 @@
use std::{
fmt::Display,
hash::Hash,
ops::{Add, Sub},
};
pub type Client = u64;
pub type Clock = u64;
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Default)]
#[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub struct Id {
pub client: Client,
pub clock: Clock,
}
impl Id {
pub fn new(client: Client, clock: Clock) -> Self {
Self { client, clock }
}
}
impl From<(Client, Clock)> for Id {
fn from((client, clock): (Client, Clock)) -> Self {
Id::new(client, clock)
}
}
impl Sub<Clock> for Id {
type Output = Id;
fn sub(self, rhs: Clock) -> Self::Output {
(self.client, self.clock - rhs).into()
}
}
impl Add<Clock> for Id {
type Output = Id;
fn add(self, rhs: Clock) -> Self::Output {
(self.client, self.clock + rhs).into()
}
}
impl Display for Id {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "({}, {})", self.client, self.clock)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_id_operation() {
let id_with_different_client_1 = Id::new(1, 1);
let id_with_different_client_2 = Id::new(2, 1);
assert_ne!(id_with_different_client_1, id_with_different_client_2);
assert_eq!(Id::new(1, 1), Id::new(1, 1));
let clock = 2;
assert_eq!(Id::new(1, 1) + clock, (1, 3).into());
assert_eq!(Id::new(1, 3) - clock, (1, 1).into());
}
}

View File

@@ -0,0 +1,296 @@
use std::io::Cursor;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use super::*;
#[inline]
pub fn read_with_cursor<T, F>(buffer: &mut Cursor<&[u8]>, f: F) -> JwstCodecResult<T>
where
F: FnOnce(&[u8]) -> IResult<&[u8], T>,
{
// TODO: use remaining_slice() instead after it is stabilized
let input = buffer.get_ref();
let rest_pos = buffer.position().min(input.len() as u64) as usize;
let input = &input[rest_pos..];
let (tail, result) = f(input).map_err(|e| e.map_input(|u| u.len()))?;
buffer.set_position((rest_pos + input.len() - tail.len()) as u64);
Ok(result)
}
// compatible with ydoc v1
#[derive(Clone)]
pub struct RawDecoder<'b> {
pub(super) buffer: Cursor<&'b [u8]>,
}
impl<'b> RawDecoder<'b> {
pub fn new(buffer: &'b [u8]) -> Self {
Self {
buffer: Cursor::new(buffer),
}
}
pub fn rest_ref(&self) -> &[u8] {
let pos = self.buffer.position();
let buf = self.buffer.get_ref();
if pos == 0 {
buf
} else {
&buf[(pos as usize).min(buf.len())..]
}
}
pub fn drain(self) -> &'b [u8] {
let pos = self.buffer.position() as usize;
let buf = self.buffer.into_inner();
if pos == 0 {
buf
} else {
&buf[pos..]
}
}
}
impl CrdtReader for RawDecoder<'_> {
fn is_empty(&self) -> bool {
self.buffer.position() >= self.buffer.get_ref().len() as u64
}
fn len(&self) -> u64 {
self.buffer.get_ref().len() as u64 - self.buffer.position()
}
fn read_var_u64(&mut self) -> JwstCodecResult<u64> {
read_with_cursor(&mut self.buffer, read_var_u64)
}
fn read_var_i32(&mut self) -> JwstCodecResult<i32> {
read_with_cursor(&mut self.buffer, read_var_i32)
}
fn read_var_string(&mut self) -> JwstCodecResult<String> {
read_with_cursor(&mut self.buffer, read_var_string)
}
fn read_var_buffer(&mut self) -> JwstCodecResult<Vec<u8>> {
read_with_cursor(&mut self.buffer, |i| {
read_var_buffer(i).map(|(tail, val)| (tail, val.to_vec()))
})
}
fn read_u8(&mut self) -> JwstCodecResult<u8> {
self.buffer.read_u8().map_err(reader::map_read_error)
}
fn read_f32_be(&mut self) -> JwstCodecResult<f32> {
self
.buffer
.read_f32::<BigEndian>()
.map_err(reader::map_read_error)
}
fn read_f64_be(&mut self) -> JwstCodecResult<f64> {
self
.buffer
.read_f64::<BigEndian>()
.map_err(reader::map_read_error)
}
fn read_i64_be(&mut self) -> JwstCodecResult<i64> {
self
.buffer
.read_i64::<BigEndian>()
.map_err(reader::map_read_error)
}
#[inline(always)]
fn read_info(&mut self) -> JwstCodecResult<u8> {
self.read_u8()
}
#[inline(always)]
fn read_item_id(&mut self) -> JwstCodecResult<Id> {
let client = self.read_var_u64()?;
let clock = self.read_var_u64()?;
Ok(Id::new(client, clock))
}
}
// compatible with ydoc v1
#[derive(Default)]
pub struct RawEncoder {
buffer: Cursor<Vec<u8>>,
}
impl RawEncoder {
pub fn into_inner(self) -> Vec<u8> {
self.buffer.into_inner()
}
}
impl CrdtWriter for RawEncoder {
fn write_var_u64(&mut self, num: u64) -> JwstCodecResult {
write_var_u64(&mut self.buffer, num).map_err(writer::map_write_error)
}
fn write_var_i32(&mut self, num: i32) -> JwstCodecResult {
write_var_i32(&mut self.buffer, num).map_err(writer::map_write_error)
}
fn write_var_string<S: AsRef<str>>(&mut self, s: S) -> JwstCodecResult {
write_var_string(&mut self.buffer, s).map_err(writer::map_write_error)
}
fn write_var_buffer(&mut self, buf: &[u8]) -> JwstCodecResult {
write_var_buffer(&mut self.buffer, buf).map_err(writer::map_write_error)
}
fn write_u8(&mut self, num: u8) -> JwstCodecResult {
self.buffer.write_u8(num).map_err(writer::map_write_error)?;
Ok(())
}
fn write_f32_be(&mut self, num: f32) -> JwstCodecResult {
self
.buffer
.write_f32::<BigEndian>(num)
.map_err(writer::map_write_error)
}
fn write_f64_be(&mut self, num: f64) -> JwstCodecResult {
self
.buffer
.write_f64::<BigEndian>(num)
.map_err(writer::map_write_error)
}
fn write_i64_be(&mut self, num: i64) -> JwstCodecResult {
self
.buffer
.write_i64::<BigEndian>(num)
.map_err(writer::map_write_error)
}
#[inline(always)]
fn write_info(&mut self, num: u8) -> JwstCodecResult {
self.write_u8(num)
}
#[inline(always)]
fn write_item_id(&mut self, id: &Id) -> JwstCodecResult {
self.write_var_u64(id.client)?;
self.write_var_u64(id.clock)?;
Ok(())
}
}
#[cfg(test)]
#[allow(clippy::approx_constant)]
mod tests {
use super::*;
#[test]
fn test_crdt_reader() {
{
let mut reader = RawDecoder::new(&[0xf2, 0x5]);
assert_eq!(reader.read_var_u64().unwrap(), 754);
}
{
let mut reader = RawDecoder::new(&[0x5, b'h', b'e', b'l', b'l', b'o']);
assert_eq!(reader.clone().read_var_string().unwrap(), "hello");
assert_eq!(
reader.clone().read_var_buffer().unwrap().as_slice(),
b"hello"
);
assert_eq!(reader.read_u8().unwrap(), 5);
assert_eq!(reader.read_u8().unwrap(), b'h');
assert_eq!(reader.read_u8().unwrap(), b'e');
assert_eq!(reader.read_u8().unwrap(), b'l');
assert_eq!(reader.read_u8().unwrap(), b'l');
assert_eq!(reader.read_u8().unwrap(), b'o');
}
{
let mut reader = RawDecoder::new(&[0x40, 0x49, 0x0f, 0xdb]);
assert_eq!(reader.read_f32_be().unwrap(), 3.1415927);
}
{
let mut reader = RawDecoder::new(&[0x40, 0x09, 0x21, 0xfb, 0x54, 0x44, 0x2d, 0x18]);
assert_eq!(reader.read_f64_be().unwrap(), 3.141592653589793);
}
{
let mut reader = RawDecoder::new(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
assert_eq!(reader.read_i64_be().unwrap(), i64::MAX);
}
{
let mut reader = RawDecoder::new(&[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
assert_eq!(reader.read_i64_be().unwrap(), i64::MIN);
}
}
#[test]
fn test_crdt_writer() {
{
let mut writer = RawEncoder::default();
writer.write_var_u64(754).unwrap();
assert_eq!(writer.into_inner(), vec![0xf2, 0x5]);
}
{
let ret = vec![0x5, b'h', b'e', b'l', b'l', b'o'];
let mut writer = RawEncoder::default();
writer.write_var_string("hello").unwrap();
assert_eq!(writer.into_inner(), ret);
let mut writer = RawEncoder::default();
writer.write_var_buffer(b"hello").unwrap();
assert_eq!(writer.into_inner(), ret);
let mut writer = RawEncoder::default();
writer.write_u8(5).unwrap();
writer.write_u8(b'h').unwrap();
writer.write_u8(b'e').unwrap();
writer.write_u8(b'l').unwrap();
writer.write_u8(b'l').unwrap();
writer.write_u8(b'o').unwrap();
assert_eq!(writer.into_inner(), ret);
}
{
let mut writer = RawEncoder::default();
writer.write_f32_be(3.1415927).unwrap();
assert_eq!(writer.into_inner(), vec![0x40, 0x49, 0x0f, 0xdb]);
}
{
let mut writer = RawEncoder::default();
writer.write_f64_be(3.141592653589793).unwrap();
assert_eq!(
writer.into_inner(),
vec![0x40, 0x09, 0x21, 0xfb, 0x54, 0x44, 0x2d, 0x18]
);
}
{
let mut writer = RawEncoder::default();
writer.write_i64_be(i64::MAX).unwrap();
assert_eq!(
writer.into_inner(),
vec![0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]
);
}
{
let mut writer = RawEncoder::default();
writer.write_i64_be(i64::MIN).unwrap();
assert_eq!(
writer.into_inner(),
vec![0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
);
}
{
let mut writer = RawEncoder::default();
writer.write_info(0x80).unwrap();
assert_eq!(writer.into_inner(), vec![0x80]);
}
{
let mut writer = RawEncoder::default();
writer.write_item_id(&Id::new(1, 2)).unwrap();
assert_eq!(writer.into_inner(), vec![0x1, 0x2]);
}
}
}

View File

@@ -0,0 +1,9 @@
mod codec_v1;
mod reader;
mod writer;
pub use codec_v1::{RawDecoder, RawEncoder};
pub use reader::{CrdtRead, CrdtReader};
pub use writer::{CrdtWrite, CrdtWriter};
use super::*;

View File

@@ -0,0 +1,30 @@
use std::io::Error;
use super::*;
#[inline]
pub fn map_read_error(e: Error) -> JwstCodecError {
JwstCodecError::IncompleteDocument(e.to_string())
}
pub trait CrdtReader {
fn is_empty(&self) -> bool;
fn len(&self) -> u64;
fn read_var_u64(&mut self) -> JwstCodecResult<u64>;
fn read_var_i32(&mut self) -> JwstCodecResult<i32>;
fn read_var_string(&mut self) -> JwstCodecResult<String>;
fn read_var_buffer(&mut self) -> JwstCodecResult<Vec<u8>>;
fn read_u8(&mut self) -> JwstCodecResult<u8>;
fn read_f32_be(&mut self) -> JwstCodecResult<f32>;
fn read_f64_be(&mut self) -> JwstCodecResult<f64>;
fn read_i64_be(&mut self) -> JwstCodecResult<i64>;
fn read_info(&mut self) -> JwstCodecResult<u8>;
fn read_item_id(&mut self) -> JwstCodecResult<Id>;
}
pub trait CrdtRead<R: CrdtReader> {
fn read(reader: &mut R) -> JwstCodecResult<Self>
where
Self: Sized;
}

View File

@@ -0,0 +1,28 @@
use std::io::Error;
use super::*;
#[inline]
pub fn map_write_error(e: Error) -> JwstCodecError {
JwstCodecError::InvalidWriteBuffer(e.to_string())
}
pub trait CrdtWriter {
fn write_var_u64(&mut self, num: u64) -> JwstCodecResult;
fn write_var_i32(&mut self, num: i32) -> JwstCodecResult;
fn write_var_string<S: AsRef<str>>(&mut self, s: S) -> JwstCodecResult;
fn write_var_buffer(&mut self, buf: &[u8]) -> JwstCodecResult;
fn write_u8(&mut self, num: u8) -> JwstCodecResult;
fn write_f32_be(&mut self, num: f32) -> JwstCodecResult;
fn write_f64_be(&mut self, num: f64) -> JwstCodecResult;
fn write_i64_be(&mut self, num: i64) -> JwstCodecResult;
fn write_info(&mut self, num: u8) -> JwstCodecResult;
fn write_item_id(&mut self, id: &Id) -> JwstCodecResult;
}
pub trait CrdtWrite<W: CrdtWriter> {
fn write(&self, writer: &mut W) -> JwstCodecResult
where
Self: Sized;
}

View File

@@ -0,0 +1,427 @@
use super::*;
#[derive(Debug, Clone)]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub(crate) enum Parent {
#[cfg_attr(test, proptest(skip))]
Type(YTypeRef),
#[cfg_attr(test, proptest(value = "Parent::String(SmolStr::default())"))]
String(SmolStr),
Id(Id),
}
#[derive(Clone)]
#[cfg_attr(all(test, not(loom)), derive(proptest_derive::Arbitrary))]
pub(crate) struct Item {
pub id: Id,
pub origin_left_id: Option<Id>,
pub origin_right_id: Option<Id>,
#[cfg_attr(all(test, not(loom)), proptest(value = "Somr::none()"))]
pub left: ItemRef,
#[cfg_attr(all(test, not(loom)), proptest(value = "Somr::none()"))]
pub right: ItemRef,
pub parent: Option<Parent>,
#[cfg_attr(all(test, not(loom)), proptest(value = "Option::<SmolStr>::None"))]
pub parent_sub: Option<SmolStr>,
pub content: Content,
#[cfg_attr(all(test, not(loom)), proptest(value = "ItemFlag::default()"))]
pub flags: ItemFlag,
}
// make all Item readonly
pub(crate) type ItemRef = Somr<Item>;
impl PartialEq for Item {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
impl std::fmt::Debug for Item {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut dbg = f.debug_struct("Item");
dbg
.field("id", &self.id)
.field("origin_left_id", &self.origin_left_id)
.field("origin_right_id", &self.origin_right_id);
if let Some(left) = self.left.get() {
dbg.field("left", &left.id);
}
if let Some(right) = self.right.get() {
dbg.field("right", &right.id);
}
dbg
.field(
"parent",
&self.parent.as_ref().map(|p| match p {
Parent::Type(_) => "[Type]".to_string(),
Parent::String(name) => format!("Parent({name})"),
Parent::Id(id) => format!("({}, {})", id.client, id.clock),
}),
)
.field("parent_sub", &self.parent_sub)
.field("content", &self.content)
.field("flags", &self.flags)
.finish()
}
}
impl std::fmt::Display for Item {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Item{}: [{:?}]", self.id, self.content)
}
}
impl Default for Item {
fn default() -> Self {
Self {
id: Id::default(),
origin_left_id: None,
origin_right_id: None,
left: Somr::none(),
right: Somr::none(),
parent: None,
parent_sub: None,
content: Content::Deleted(0),
flags: ItemFlag::from(0),
}
}
}
impl Item {
pub fn new(
id: Id,
content: Content,
left: Somr<Item>,
right: Somr<Item>,
parent: Option<Parent>,
parent_sub: Option<SmolStr>,
) -> Self {
let flags = ItemFlag::from(if content.countable() {
item_flags::ITEM_COUNTABLE
} else {
0
});
Self {
id,
origin_left_id: left.get().map(|left| left.last_id()),
left,
origin_right_id: right.get().map(|right| right.id),
right,
parent,
parent_sub,
content,
flags,
}
}
// find a note that has parent info
// in crdt tree, not all node has parent info
// so we need to check left and right node if they have parent info
pub fn find_node_with_parent_info(&self) -> Option<Item> {
if self.parent.is_some() {
return Some(self.clone());
} else if let Some(item) = self.left.get() {
if item.parent.is_none() {
if let Some(item) = item.right.get() {
return Some(item.clone());
}
} else {
return Some(item.clone());
}
} else if let Some(item) = self.right.get() {
return Some(item.clone());
}
None
}
pub fn len(&self) -> u64 {
self.content.clock_len()
}
pub fn deleted(&self) -> bool {
self.flags.deleted()
}
pub fn delete(&self) -> bool {
if self.deleted() {
return false;
}
self.flags.set_deleted();
true
}
pub fn countable(&self) -> bool {
self.flags.countable()
}
pub fn keep(&self) -> bool {
self.flags.keep()
}
pub fn indexable(&self) -> bool {
self.countable() && !self.deleted()
}
pub fn last_id(&self) -> Id {
let Id { client, clock } = self.id;
Id::new(client, clock + self.len() - 1)
}
pub fn split_at(&self, offset: u64) -> JwstCodecResult<(Self, Self)> {
debug_assert!(offset > 0 && self.len() > 1 && offset < self.len());
let id = self.id;
let right_id = Id::new(id.client, id.clock + offset);
let (left_content, right_content) = self.content.split(offset)?;
let left_item = Item::new(
id,
left_content,
// let caller connect left <-> node <-> right
Somr::none(),
Somr::none(),
self.parent.clone(),
self.parent_sub.clone(),
);
let right_item = Item::new(
right_id,
right_content,
// let caller connect left <-> node <-> right
Somr::none(),
Somr::none(),
self.parent.clone(),
self.parent_sub.clone(),
);
if left_item.deleted() {
left_item.flags.set_deleted();
}
if left_item.keep() {
left_item.flags.set_keep();
}
Ok((left_item, right_item))
}
fn get_info(&self) -> u8 {
let mut info = self.content.get_info();
if self.origin_left_id.is_some() {
info |= item_flags::ITEM_HAS_LEFT_ID;
}
if self.origin_right_id.is_some() {
info |= item_flags::ITEM_HAS_RIGHT_ID;
}
if self.parent_sub.is_some() {
info |= item_flags::ITEM_HAS_PARENT_SUB;
}
info
}
pub fn is_valid(&self) -> bool {
let has_id = self.origin_left_id.is_some() || self.origin_right_id.is_some();
!has_id && self.parent.is_some() || has_id && self.parent.is_none() && self.parent_sub.is_none()
}
pub fn read<R: CrdtReader>(
decoder: &mut R,
id: Id,
info: u8,
first_5_bit: u8,
) -> JwstCodecResult<Self> {
let flags: ItemFlag = info.into();
let has_left_id = flags.check(item_flags::ITEM_HAS_LEFT_ID);
let has_right_id = flags.check(item_flags::ITEM_HAS_RIGHT_ID);
let has_parent_sub = flags.check(item_flags::ITEM_HAS_PARENT_SUB);
let has_not_sibling = flags.not(item_flags::ITEM_HAS_SIBLING);
// NOTE: read order must keep the same as the order in yjs
// TODO: this data structure design will break the cpu OOE, need to be optimized
let item = Self {
id,
origin_left_id: if has_left_id {
Some(decoder.read_item_id()?)
} else {
None
},
origin_right_id: if has_right_id {
Some(decoder.read_item_id()?)
} else {
None
},
parent: {
if has_not_sibling {
let has_parent = decoder.read_var_u64()? == 1;
Some(if has_parent {
Parent::String(SmolStr::new(decoder.read_var_string()?))
} else {
Parent::Id(decoder.read_item_id()?)
})
} else {
None
}
},
parent_sub: if has_not_sibling && has_parent_sub {
Some(SmolStr::new(decoder.read_var_string()?))
} else {
None
},
content: {
// tag must not GC or Skip, this must process in parse_struct
debug_assert_ne!(first_5_bit, 0);
debug_assert_ne!(first_5_bit, 10);
Content::read(decoder, first_5_bit)?
},
left: Somr::none(),
right: Somr::none(),
flags: ItemFlag::from(0),
};
if item.content.countable() {
item.flags.set_countable();
}
if matches!(item.content, Content::Deleted(_)) {
item.flags.set_deleted();
}
debug_assert!(item.is_valid());
Ok(item)
}
pub fn write<W: CrdtWriter>(&self, encoder: &mut W) -> JwstCodecResult {
let info = self.get_info();
let has_not_sibling = info & item_flags::ITEM_HAS_SIBLING == 0;
encoder.write_info(info)?;
if let Some(left_id) = self.origin_left_id {
encoder.write_item_id(&left_id)?;
}
if let Some(right_id) = self.origin_right_id {
encoder.write_item_id(&right_id)?;
}
if has_not_sibling {
if let Some(parent) = &self.parent {
match parent {
Parent::String(s) => {
encoder.write_var_u64(1)?;
encoder.write_var_string(s)?;
}
Parent::Id(id) => {
encoder.write_var_u64(0)?;
encoder.write_item_id(id)?;
}
Parent::Type(ty) => {
if let Some(ty) = ty.ty() {
if let Some(item) = ty.item.get() {
encoder.write_var_u64(0)?;
encoder.write_item_id(&item.id)?;
} else if let Some(name) = &ty.root_name {
encoder.write_var_u64(1)?;
encoder.write_var_string(name)?;
}
}
}
}
} else {
// if item delete, it must not exists in crdt state tree
debug_assert!(!self.deleted());
return Err(JwstCodecError::ParentNotFound);
}
if let Some(parent_sub) = &self.parent_sub {
encoder.write_var_string(parent_sub)?;
}
}
self.content.write(encoder)?;
Ok(())
}
}
#[allow(dead_code)]
#[cfg(any(debug, test))]
impl Item {
pub fn print_left(&self) {
let mut ret = vec![format!("Self{}: [{:?}]", self.id, self.content)];
let mut left: Somr<Item> = self.left.clone();
while let Some(item) = left.get() {
ret.push(format!("{item}"));
left = item.left.clone();
}
ret.reverse();
println!("{}", ret.join(" <- "));
}
pub fn print_right(&self) {
let mut ret = vec![format!("Self{}: [{:?}]", self.id, self.content)];
let mut right = self.right.clone();
while let Some(item) = right.get() {
ret.push(format!("{item}"));
right = item.right.clone();
}
println!("{}", ret.join(" -> "));
}
}
#[cfg(test)]
mod tests {
#[cfg(not(loom))]
use proptest::{collection::vec, prelude::*};
#[cfg(not(loom))]
use super::*;
#[cfg(not(loom))]
fn item_round_trip(item: &mut Item) -> JwstCodecResult {
if !item.is_valid() {
return Ok(());
}
if item.content.countable() {
item.flags.set_countable();
}
let mut encoder = RawEncoder::default();
item.write(&mut encoder)?;
let update = encoder.into_inner();
let mut decoder = RawDecoder::new(&update);
let info = decoder.read_info()?;
let first_5_bit = info & 0b11111;
let decoded_item = Item::read(&mut decoder, item.id, info, first_5_bit)?;
assert_eq!(item, &decoded_item);
Ok(())
}
#[cfg(not(loom))]
proptest! {
#[test]
#[cfg_attr(miri, ignore)]
fn test_random_content(mut items in vec(any::<Item>(), 0..10)) {
for item in &mut items {
item_round_trip(item).unwrap();
}
}
}
}

View File

@@ -0,0 +1,170 @@
use std::sync::atomic::{AtomicU8, Ordering};
#[rustfmt::skip]
#[allow(dead_code)]
pub mod item_flags {
pub const ITEM_KEEP : u8 = 0b0000_0001;
pub const ITEM_COUNTABLE : u8 = 0b0000_0010;
pub const ITEM_DELETED : u8 = 0b0000_0100;
pub const ITEM_MARKED : u8 = 0b0000_1000;
pub const ITEM_HAS_PARENT_SUB : u8 = 0b0010_0000;
pub const ITEM_HAS_RIGHT_ID : u8 = 0b0100_0000;
pub const ITEM_HAS_LEFT_ID : u8 = 0b1000_0000;
pub const ITEM_HAS_SIBLING : u8 = 0b1100_0000;
}
#[derive(Debug)]
pub struct ItemFlag(pub(self) AtomicU8);
impl Default for ItemFlag {
fn default() -> Self {
Self(AtomicU8::new(0))
}
}
impl Clone for ItemFlag {
fn clone(&self) -> Self {
Self(AtomicU8::new(self.0.load(Ordering::Acquire)))
}
}
impl From<u8> for ItemFlag {
fn from(flags: u8) -> Self {
Self(AtomicU8::new(flags))
}
}
#[allow(dead_code)]
impl ItemFlag {
#[inline(always)]
pub fn set(&self, flag: u8) {
self.0.fetch_or(flag, Ordering::SeqCst);
}
#[inline(always)]
pub fn clear(&self, flag: u8) {
self.0.fetch_and(!flag, Ordering::SeqCst);
}
#[inline(always)]
pub fn check(&self, flag: u8) -> bool {
self.0.load(Ordering::Acquire) & flag == flag
}
#[inline(always)]
pub fn not(&self, flag: u8) -> bool {
self.0.load(Ordering::Acquire) & flag == 0
}
#[inline(always)]
pub fn keep(&self) -> bool {
self.check(item_flags::ITEM_KEEP)
}
#[inline(always)]
pub fn set_keep(&self) {
self.set(item_flags::ITEM_KEEP);
}
#[inline(always)]
pub fn clear_keep(&self) {
self.clear(item_flags::ITEM_KEEP);
}
#[inline(always)]
pub fn countable(&self) -> bool {
self.check(item_flags::ITEM_COUNTABLE)
}
#[inline(always)]
pub fn set_countable(&self) {
self.set(item_flags::ITEM_COUNTABLE);
}
#[inline(always)]
pub fn clear_countable(&self) {
self.clear(item_flags::ITEM_COUNTABLE);
}
#[inline(always)]
pub fn deleted(&self) -> bool {
self.check(item_flags::ITEM_DELETED)
}
#[inline(always)]
pub fn set_deleted(&self) {
self.set(item_flags::ITEM_DELETED);
}
#[inline(always)]
pub fn clear_deleted(&self) {
self.clear(item_flags::ITEM_DELETED);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_flag_set_and_clear() {
{
let flag = super::ItemFlag::default();
assert!(!flag.keep());
flag.set_keep();
assert!(flag.keep());
flag.clear_keep();
assert!(!flag.keep());
assert_eq!(
flag.0.load(Ordering::SeqCst),
ItemFlag::default().0.load(Ordering::SeqCst)
);
}
{
let flag = super::ItemFlag::default();
assert!(!flag.countable());
flag.set_countable();
assert!(flag.countable());
flag.clear_countable();
assert!(!flag.countable());
assert_eq!(
flag.0.load(Ordering::SeqCst),
ItemFlag::default().0.load(Ordering::SeqCst)
);
}
{
let flag = super::ItemFlag::default();
assert!(!flag.deleted());
flag.set_deleted();
assert!(flag.deleted());
flag.clear_deleted();
assert!(!flag.deleted());
assert_eq!(
flag.0.load(Ordering::SeqCst),
ItemFlag::default().0.load(Ordering::SeqCst)
);
}
{
let flag = super::ItemFlag::default();
flag.set_keep();
flag.set_countable();
flag.set_deleted();
assert!(flag.keep());
assert!(flag.countable());
assert!(flag.deleted());
flag.clear_keep();
flag.clear_countable();
flag.clear_deleted();
assert!(!flag.keep());
assert!(!flag.countable());
assert!(!flag.deleted());
assert_eq!(
flag.0.load(Ordering::SeqCst),
ItemFlag::default().0.load(Ordering::SeqCst)
);
}
}
}

View File

@@ -0,0 +1,25 @@
mod any;
mod content;
mod delete_set;
mod id;
mod io;
mod item;
mod item_flag;
mod refs;
mod update;
#[cfg(test)]
mod utils;
pub use any::Any;
pub(crate) use content::Content;
pub use delete_set::DeleteSet;
pub use id::{Client, Clock, Id};
pub use io::{CrdtRead, CrdtReader, CrdtWrite, CrdtWriter, RawDecoder, RawEncoder};
pub(crate) use item::{Item, ItemRef, Parent};
pub(crate) use item_flag::{item_flags, ItemFlag};
pub(crate) use refs::Node;
pub use update::Update;
#[cfg(test)]
pub(crate) use utils::*;
use super::*;

View File

@@ -0,0 +1,480 @@
use super::*;
// make fields Copy + Clone without much effort
#[derive(Debug, Clone)]
#[cfg_attr(all(test, not(loom)), derive(proptest_derive::Arbitrary))]
pub(crate) enum Node {
GC(Box<NodeLen>),
Skip(Box<NodeLen>),
Item(ItemRef),
}
/// Simple representation of id and len struct used by GC and Skip node.
#[derive(Debug, Clone)]
#[cfg_attr(all(test, not(loom)), derive(proptest_derive::Arbitrary))]
pub(crate) struct NodeLen {
pub id: Id,
pub len: u64,
}
impl<W: CrdtWriter> CrdtWrite<W> for Node {
fn write(&self, writer: &mut W) -> JwstCodecResult {
match self {
Node::GC(item) => {
writer.write_info(0)?;
writer.write_var_u64(item.len)
}
Node::Skip(item) => {
writer.write_info(10)?;
writer.write_var_u64(item.len)
}
Node::Item(item) => item.get().unwrap().write(writer),
}
}
}
impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Node::GC(left), Node::GC(right)) => left.id == right.id,
(Node::Skip(left), Node::Skip(right)) => left.id == right.id,
(Node::Item(item1), Node::Item(item2)) => item1.get() == item2.get(),
_ => false,
}
}
}
impl Eq for Node {
fn assert_receiver_is_total_eq(&self) {}
}
impl From<Item> for Node {
fn from(value: Item) -> Self {
Self::Item(Somr::new(value))
}
}
impl Node {
pub fn new_skip(id: Id, len: u64) -> Self {
Self::Skip(Box::new(NodeLen { id, len }))
}
pub fn new_gc(id: Id, len: u64) -> Self {
Self::GC(Box::new(NodeLen { id, len }))
}
pub fn read<R: CrdtReader>(decoder: &mut R, id: Id) -> JwstCodecResult<Self> {
let info = decoder.read_info()?;
let first_5_bit = info & 0b11111;
match first_5_bit {
0 => {
let len = decoder.read_var_u64()?;
Ok(Node::new_gc(id, len))
}
10 => {
let len = decoder.read_var_u64()?;
Ok(Node::new_skip(id, len))
}
_ => {
let item = Somr::new(Item::read(decoder, id, info, first_5_bit)?);
if let Content::Type(ty) = &item.get().unwrap().content {
if let Some(mut ty) = ty.ty_mut() {
ty.item = item.clone();
}
}
Ok(Node::Item(item))
}
}
}
pub fn id(&self) -> Id {
match self {
Node::GC(item) => item.id,
Node::Skip(item) => item.id,
Node::Item(item) => unsafe { item.get_unchecked() }.id,
}
}
pub fn client(&self) -> Client {
self.id().client
}
pub fn clock(&self) -> Clock {
self.id().clock
}
pub fn len(&self) -> u64 {
match self {
Self::GC(item) => item.len,
Self::Skip(item) => item.len,
Self::Item(item) => unsafe { item.get_unchecked() }.len(),
}
}
pub fn is_gc(&self) -> bool {
matches!(self, Self::GC { .. })
}
pub fn is_skip(&self) -> bool {
matches!(self, Self::Skip { .. })
}
pub fn is_item(&self) -> bool {
matches!(self, Self::Item(_))
}
pub fn as_item(&self) -> Somr<Item> {
if let Self::Item(item) = self {
item.clone()
} else {
Somr::none()
}
}
pub fn left(&self) -> Option<Self> {
if let Node::Item(item) = self {
item.get().map(|item| Node::Item(item.left.clone()))
} else {
None
}
}
pub fn right(&self) -> Option<Self> {
if let Node::Item(item) = self {
item.get().map(|item| Node::Item(item.right.clone()))
} else {
None
}
}
pub fn head(&self) -> Self {
let mut cur = self.clone();
while let Some(left) = cur.left() {
if left.is_item() {
cur = left
} else {
break;
}
}
cur
}
#[allow(dead_code)]
pub fn tail(&self) -> Self {
let mut cur = self.clone();
while let Some(right) = cur.right() {
if right.is_item() {
cur = right
} else {
break;
}
}
cur
}
pub fn flags(&self) -> ItemFlag {
if let Node::Item(item) = self {
item.get().unwrap().flags.clone()
} else {
// deleted
ItemFlag::from(4)
}
}
pub fn last_id(&self) -> Option<Id> {
if let Node::Item(item) = self {
item.get().map(|item| item.last_id())
} else {
None
}
}
pub fn split_at(&self, offset: u64) -> JwstCodecResult<(Self, Self)> {
if let Self::Item(item) = self {
let item = item.get().unwrap();
debug_assert!(offset > 0 && item.len() > 1 && offset < item.len());
let id = item.id;
let right_id = Id::new(id.client, id.clock + offset);
let (left_content, right_content) = item.content.split(offset)?;
let left_item = Somr::new(Item::new(
id,
left_content,
// let caller connect left <-> node <-> right
Somr::none(),
Somr::none(),
item.parent.clone(),
item.parent_sub.clone(),
));
let right_item = Somr::new(Item::new(
right_id,
right_content,
// let caller connect left <-> node <-> right
Somr::none(),
Somr::none(),
item.parent.clone(),
item.parent_sub.clone(),
));
Ok((Self::Item(left_item), Self::Item(right_item)))
} else {
Err(JwstCodecError::ItemSplitNotSupport)
}
}
#[inline]
#[allow(dead_code)]
pub fn countable(&self) -> bool {
self.flags().countable()
}
#[inline]
pub fn deleted(&self) -> bool {
self.flags().deleted()
}
pub fn merge(&mut self, right: Self) -> bool {
match (self, right) {
(Node::GC(left), Node::GC(right)) => {
left.len += right.len;
}
(Node::Skip(left), Node::Skip(right)) => {
left.len += right.len;
}
(Node::Item(lref), Node::Item(rref)) => {
let mut litem = unsafe { lref.get_mut_unchecked() };
let mut ritem = unsafe { rref.get_mut_unchecked() };
let llen = litem.len();
if litem.id.client != ritem.id.client
// not same delete status
|| litem.deleted() != ritem.deleted()
// not clock continuous
|| litem.id.clock + litem.len() != ritem.id.clock
// not insertion continuous
|| Some(litem.last_id()) != ritem.origin_left_id
// not insertion continuous
|| litem.origin_right_id != ritem.origin_right_id
// not runtime continuous
|| litem.right != rref
{
return false;
}
match (&mut litem.content, &mut ritem.content) {
(Content::Deleted(l), Content::Deleted(r)) => {
*l += *r;
}
(Content::Json(l), Content::Json(r)) => {
l.extend(r.drain(0..));
}
(Content::String(l), Content::String(r)) => {
*l += r;
}
(Content::Any(l), Content::Any(r)) => {
l.extend(r.drain(0..));
}
_ => {
return false;
}
}
if let Some(Parent::Type(p)) = &litem.parent {
if let Some(parent) = p.ty_mut() {
if let Some(markers) = &parent.markers {
markers.replace_marker(rref.clone(), lref.clone(), -(llen as i64));
}
}
}
if ritem.keep() {
litem.flags.set_keep()
}
litem.right = ritem.right.clone();
unsafe {
if litem.right.is_some() {
litem.right.get_mut_unchecked().left = lref.clone();
}
}
}
_ => {
return false;
}
}
true
}
}
impl From<Option<Node>> for Somr<Item> {
fn from(value: Option<Node>) -> Self {
match value {
Some(n) => n.as_item(),
None => Somr::none(),
}
}
}
impl From<&Option<Node>> for Somr<Item> {
fn from(value: &Option<Node>) -> Self {
match value {
Some(n) => n.as_item(),
None => Somr::none(),
}
}
}
impl From<Option<&Node>> for Somr<Item> {
fn from(value: Option<&Node>) -> Self {
match value {
Some(n) => n.as_item(),
None => Somr::none(),
}
}
}
#[cfg(test)]
mod tests {
#[cfg(not(loom))]
use proptest::{collection::vec, prelude::*};
use super::{utils::ItemBuilder, *};
#[test]
fn test_struct_info() {
loom_model!({
{
let struct_info = Node::new_gc(Id::new(1, 0), 10);
assert_eq!(struct_info.len(), 10);
assert_eq!(struct_info.client(), 1);
assert_eq!(struct_info.clock(), 0);
}
{
let struct_info = Node::new_skip(Id::new(2, 0), 20);
assert_eq!(struct_info.len(), 20);
assert_eq!(struct_info.client(), 2);
assert_eq!(struct_info.clock(), 0);
}
{
let item = ItemBuilder::new()
.id((3, 0).into())
.left_id(None)
.right_id(None)
.parent(Some(Parent::String(SmolStr::new_inline("parent"))))
.parent_sub(None)
.content(Content::String(String::from("content")))
.build();
let struct_info = Node::Item(Somr::new(item));
assert_eq!(struct_info.len(), 7);
assert_eq!(struct_info.client(), 3);
assert_eq!(struct_info.clock(), 0);
}
});
}
#[test]
fn test_read_write_struct_info() {
loom_model!({
let has_not_parent_id_and_has_parent = Node::Item(Somr::new(
ItemBuilder::new()
.id((0, 0).into())
.left_id(None)
.right_id(None)
.parent(Some(Parent::String(SmolStr::new_inline("parent"))))
.parent_sub(None)
.content(Content::String(String::from("content")))
.build(),
));
let has_not_parent_id_and_has_parent_with_key = Node::Item(Somr::new(
ItemBuilder::new()
.id((0, 0).into())
.left_id(None)
.right_id(None)
.parent(Some(Parent::String(SmolStr::new_inline("parent"))))
.parent_sub(Some(SmolStr::new_inline("parent_sub")))
.content(Content::String(String::from("content")))
.build(),
));
let has_parent_id = Node::Item(Somr::new(
ItemBuilder::new()
.id((0, 0).into())
.left_id(Some((1, 2).into()))
.right_id(Some((2, 5).into()))
.parent(None)
.parent_sub(None)
.content(Content::String(String::from("content")))
.build(),
));
let struct_infos = vec![
Node::new_gc((0, 0).into(), 42),
Node::new_skip((0, 0).into(), 314),
has_not_parent_id_and_has_parent,
has_not_parent_id_and_has_parent_with_key,
has_parent_id,
];
for info in struct_infos {
let mut encoder = RawEncoder::default();
info.write(&mut encoder).unwrap();
let update = encoder.into_inner();
let mut decoder = RawDecoder::new(&update);
let decoded = Node::read(&mut decoder, info.id()).unwrap();
assert_eq!(info, decoded);
}
});
}
#[cfg(not(loom))]
fn struct_info_round_trip(info: &mut Node) -> JwstCodecResult {
if let Node::Item(item) = info {
if let Some(item) = item.get_mut() {
if !item.is_valid() {
return Ok(());
}
if item.content.countable() {
item.flags.set_countable();
}
}
}
let mut encoder = RawEncoder::default();
info.write(&mut encoder)?;
let ret = encoder.into_inner();
let mut decoder = RawDecoder::new(&ret);
let decoded = Node::read(&mut decoder, info.id())?;
assert_eq!(info, &decoded);
Ok(())
}
#[cfg(not(loom))]
proptest! {
#[test]
#[cfg_attr(miri, ignore)]
fn test_random_struct_info(mut infos in vec(any::<Node>(), 0..10)) {
for info in &mut infos {
struct_info_round_trip(info).unwrap();
}
}
}
}

View File

@@ -0,0 +1,721 @@
use std::{collections::VecDeque, ops::Range};
use super::*;
use crate::doc::StateVector;
#[derive(Debug, Default, Clone)]
pub struct Update {
pub(crate) structs: ClientMap<VecDeque<Node>>,
pub(crate) delete_set: DeleteSet,
/// all unapplicable items that we can't integrate into doc
/// any item with inconsistent id clock or missing dependency will be put
/// here
pub(crate) pending_structs: ClientMap<VecDeque<Node>>,
/// missing state vector after applying updates
pub(crate) missing_state: StateVector,
/// all unapplicable delete set
pub(crate) pending_delete_set: DeleteSet,
}
impl<R: CrdtReader> CrdtRead<R> for Update {
fn read(decoder: &mut R) -> JwstCodecResult<Self> {
let num_of_clients = decoder.read_var_u64()? as usize;
// See: [HASHMAP_SAFE_CAPACITY]
let mut map = ClientMap::with_capacity(num_of_clients.min(HASHMAP_SAFE_CAPACITY));
for _ in 0..num_of_clients {
let num_of_structs = decoder.read_var_u64()? as usize;
let client = decoder.read_var_u64()?;
let mut clock = decoder.read_var_u64()?;
// same reason as above
let mut structs = VecDeque::with_capacity(num_of_structs.min(HASHMAP_SAFE_CAPACITY));
for _ in 0..num_of_structs {
let struct_info = Node::read(decoder, Id::new(client, clock))?;
clock += struct_info.len();
structs.push_back(struct_info);
}
structs.shrink_to_fit();
map.insert(client, structs);
}
map.shrink_to_fit();
let delete_set = DeleteSet::read(decoder)?;
if !decoder.is_empty() {
return Err(JwstCodecError::UpdateNotFullyConsumed(
decoder.len() as usize
));
}
Ok(Update {
structs: map,
delete_set,
..Update::default()
})
}
}
impl<W: CrdtWriter> CrdtWrite<W> for Update {
fn write(&self, encoder: &mut W) -> JwstCodecResult {
encoder.write_var_u64(self.structs.len() as u64)?;
let mut clients = self.structs.keys().copied().collect::<Vec<_>>();
// Descending
clients.sort_by(|a, b| b.cmp(a));
for client in clients {
let structs = self.structs.get(&client).unwrap();
encoder.write_var_u64(structs.len() as u64)?;
encoder.write_var_u64(client)?;
encoder.write_var_u64(structs.front().map(|s| s.clock()).unwrap_or(0))?;
for struct_info in structs {
struct_info.write(encoder)?;
}
}
self.delete_set.write(encoder)?;
Ok(())
}
}
impl Update {
// decode from ydoc v1
pub fn decode_v1<T: AsRef<[u8]>>(buffer: T) -> JwstCodecResult<Update> {
Update::read(&mut RawDecoder::new(buffer.as_ref()))
}
pub fn encode_v1(&self) -> JwstCodecResult<Vec<u8>> {
let mut encoder = RawEncoder::default();
self.write(&mut encoder)?;
Ok(encoder.into_inner())
}
pub(crate) fn iter(&mut self, state: StateVector) -> UpdateIterator {
UpdateIterator::new(self, state)
}
pub fn delete_set_iter(&mut self, state: StateVector) -> DeleteSetIterator {
DeleteSetIterator::new(self, state)
}
// take all pending structs and delete set to [self] update struct
pub fn drain_pending_state(&mut self) {
debug_assert!(self.is_empty());
std::mem::swap(&mut self.pending_structs, &mut self.structs);
std::mem::swap(&mut self.pending_delete_set, &mut self.delete_set);
}
pub fn merge<I: IntoIterator<Item = Update>>(updates: I) -> Update {
let mut merged = Update::default();
Self::merge_into(&mut merged, updates);
merged
}
pub fn merge_into<I: IntoIterator<Item = Update>>(target: &mut Update, updates: I) {
for update in updates {
target.delete_set.merge(&update.delete_set);
for (client, structs) in update.structs {
let iter = structs.into_iter().filter(|p| !p.is_skip());
if let Some(merged_structs) = target.structs.get_mut(&client) {
merged_structs.extend(iter);
} else {
target.structs.insert(client, iter.collect());
}
}
}
for structs in target.structs.values_mut() {
structs.make_contiguous().sort_by_key(|s| s.id().clock);
// insert [Node::Skip] if structs[index].id().clock + structs[index].len() <
// structs[index + 1].id().clock
let mut index = 0;
let mut merged_index = vec![];
while index < structs.len() - 1 {
let cur = &structs[index];
let next = &structs[index + 1];
let clock_end = cur.id().clock + cur.len();
let next_clock = next.id().clock;
if next_clock > clock_end {
structs.insert(
index + 1,
Node::new_skip((cur.id().client, clock_end).into(), next_clock - clock_end),
);
index += 1;
} else if cur.id().clock == next_clock {
if cur.deleted() == next.deleted()
&& cur.last_id() == next.last_id()
&& cur.left() == next.left()
&& cur.right() == next.right()
{
// merge two nodes, mark the index
merged_index.push(index + 1);
} else {
debug!("merge failed: {:?} {:?}", cur, next)
}
}
index += 1;
}
{
// prune the merged nodes
let mut new_structs = VecDeque::with_capacity(structs.len() - merged_index.len());
let mut next_remove_idx = 0;
for (idx, val) in structs.drain(..).enumerate() {
if next_remove_idx < merged_index.len() && idx == merged_index[next_remove_idx] {
next_remove_idx += 1;
} else {
new_structs.push_back(val);
}
}
structs.extend(new_structs);
}
}
}
pub fn is_content_empty(&self) -> bool {
self.structs.is_empty()
}
pub fn is_empty(&self) -> bool {
self.structs.is_empty() && self.delete_set.is_empty()
}
pub fn is_pending_empty(&self) -> bool {
self.pending_structs.is_empty() && self.pending_delete_set.is_empty()
}
}
pub(crate) struct UpdateIterator<'a> {
update: &'a mut Update,
// --- local iterator state ---
/// current state vector from store
state: StateVector,
/// all client ids sorted ascending
client_ids: Vec<Client>,
/// current id of client of the updates we're processing
cur_client_id: Option<Client>,
/// stack of previous iterating item with higher priority than updates in
/// next iteration
stack: Vec<Node>,
}
impl<'a> UpdateIterator<'a> {
pub fn new(update: &'a mut Update, state: StateVector) -> Self {
let mut client_ids = update.structs.keys().cloned().collect::<Vec<_>>();
client_ids.sort();
let cur_client_id = client_ids.pop();
UpdateIterator {
update,
state,
client_ids,
cur_client_id,
stack: Vec::new(),
}
}
/// iterate the client ids until we find the next client with left updates
/// that can be consumed
///
/// note:
/// firstly we will check current client id as well to ensure current
/// updates queue is not empty yet
fn next_client(&mut self) -> Option<Client> {
while let Some(client_id) = self.cur_client_id {
match self.update.structs.get(&client_id) {
Some(refs) if !refs.is_empty() => {
self.cur_client_id.replace(client_id);
return self.cur_client_id;
}
_ => {
self.update.structs.remove(&client_id);
self.cur_client_id = self.client_ids.pop();
}
}
}
None
}
/// update the missing state vector
/// tell it the smallest clock that missed.
fn update_missing_state(&mut self, client: Client, clock: Clock) {
self.update.missing_state.set_min(client, clock);
}
/// any time we can't apply an update during the iteration,
/// we should put all items in pending stack to rest structs
fn add_stack_to_rest(&mut self) {
for s in self.stack.drain(..) {
let client = s.id().client;
let unapplicable_items = self.update.structs.remove(&client);
if let Some(mut items) = unapplicable_items {
items.push_front(s);
self.update.pending_structs.insert(client, items);
} else {
self.update.pending_structs.insert(client, [s].into());
}
self.client_ids.retain(|&c| c != client);
}
}
/// tell if current update's dependencies(left, right, parent) has already
/// been consumed and recorded and return the client of them if not.
fn get_missing_dep(&self, struct_info: &Node) -> Option<Client> {
if let Some(item) = struct_info.as_item().get() {
let id = item.id;
if let Some(left) = &item.origin_left_id {
if left.client != id.client && left.clock >= self.state.get(&left.client) {
return Some(left.client);
}
}
if let Some(right) = &item.origin_right_id {
if right.client != id.client && right.clock >= self.state.get(&right.client) {
return Some(right.client);
}
}
if let Some(parent) = &item.parent {
match parent {
Parent::Id(parent_id)
if parent_id.client != id.client
&& parent_id.clock >= self.state.get(&parent_id.client) =>
{
return Some(parent_id.client);
}
_ => {}
}
}
}
None
}
fn next_candidate(&mut self) -> Option<Node> {
let mut cur = None;
if !self.stack.is_empty() {
cur.replace(self.stack.pop().unwrap());
} else if let Some(client) = self.next_client() {
// Safety:
// client index of updates and update length are both checked in next_client
// safe to use unwrap
cur.replace(
self
.update
.structs
.get_mut(&client)
.unwrap()
.pop_front()
.unwrap(),
);
}
cur
}
}
impl Iterator for UpdateIterator<'_> {
type Item = (Node, u64);
fn next(&mut self) -> Option<Self::Item> {
// fetch the first candidate from stack or updates
let mut cur = self.next_candidate();
while let Some(cur_update) = cur.take() {
let id = cur_update.id();
if cur_update.is_skip() {
cur = self.next_candidate();
continue;
} else if !self.state.contains(&id) {
// missing local state of same client
// can't apply the continuous updates from same client
// push into the stack and put tell all the items in stack are unapplicable
self.stack.push(cur_update);
self.update_missing_state(id.client, id.clock - 1);
self.add_stack_to_rest();
} else {
let id = cur_update.id();
let dep = self.get_missing_dep(&cur_update);
// some dependency is missing, we need to turn to iterate the dependency first.
if let Some(dep) = dep {
self.stack.push(cur_update);
match self.update.structs.get_mut(&dep) {
Some(updates) if !updates.is_empty() => {
// iterate the dependency client first
cur.replace(updates.pop_front().unwrap());
continue;
}
// but the dependency update is drained
// need to move all stack item to unapplicable store
_ => {
self.update_missing_state(dep, self.state.get(&dep));
self.add_stack_to_rest();
}
}
} else {
// we finally find the first applicable update
let local_state = self.state.get(&id.client);
// we've already check the local state is greater or equal to current update's
// clock so offset here will never be negative
let offset = local_state - id.clock;
if offset == 0 || offset < cur_update.len() {
self.state.set_max(id.client, id.clock + cur_update.len());
return Some((cur_update, offset));
}
}
}
cur = self.next_candidate();
}
// we all done
None
}
}
pub struct DeleteSetIterator<'a> {
update: &'a mut Update,
/// current state vector from store
state: StateVector,
}
impl<'a> DeleteSetIterator<'a> {
pub fn new(update: &'a mut Update, state: StateVector) -> Self {
DeleteSetIterator { update, state }
}
}
impl Iterator for DeleteSetIterator<'_> {
type Item = (Client, Range<u64>);
fn next(&mut self) -> Option<Self::Item> {
while let Some(client) = self.update.delete_set.keys().next().cloned() {
let deletes = self.update.delete_set.get_mut(&client).unwrap();
let local_state = self.state.get(&client);
while let Some(range) = deletes.pop() {
let start = range.start;
let end = range.end;
if start < local_state {
if local_state < end {
// partially state missing
// [start..end)
// ^ local_state in between
// // split
// [start..local_state) [local_state..end)
// ^^^^^ unapplicable
self
.update
.pending_delete_set
.add(client, local_state, end - local_state);
return Some((client, start..local_state));
}
return Some((client, range));
} else {
// all state missing
self
.update
.pending_delete_set
.add(client, start, end - start);
}
}
self.update.delete_set.remove(&client);
}
None
}
}
#[cfg(test)]
mod tests {
use std::{num::ParseIntError, path::PathBuf};
use serde::Deserialize;
use super::*;
use crate::doc::common::OrderRange;
fn struct_item(id: (Client, Clock), len: usize) -> Node {
Node::Item(Somr::new(
ItemBuilder::new()
.id(id.into())
.content(Content::String("c".repeat(len)))
.build(),
))
}
fn parse_doc_update(input: Vec<u8>) -> JwstCodecResult<Update> {
Update::decode_v1(input)
}
#[test]
#[cfg_attr(any(miri, loom), ignore)]
fn test_parse_doc() {
let docs = [
(include_bytes!("../../fixtures/basic.bin").to_vec(), 1, 188),
(
include_bytes!("../../fixtures/database.bin").to_vec(),
1,
149,
),
(include_bytes!("../../fixtures/large.bin").to_vec(), 1, 9036),
(
include_bytes!("../../fixtures/with-subdoc.bin").to_vec(),
2,
30,
),
(
include_bytes!("../../fixtures/edge-case-left-right-same-node.bin").to_vec(),
2,
243,
),
];
for (doc, clients, structs) in docs {
let update = parse_doc_update(doc).unwrap();
assert_eq!(update.structs.len(), clients);
assert_eq!(
update.structs.iter().map(|s| s.1.len()).sum::<usize>(),
structs
);
}
}
fn decode_hex(s: &str) -> Result<Vec<u8>, ParseIntError> {
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16))
.collect()
}
#[allow(dead_code)]
#[derive(Deserialize, Debug)]
struct Data {
id: u64,
workspace: String,
timestamp: String,
blob: String,
}
#[ignore = "just for local data test"]
#[test]
fn test_parse_local_doc() {
let json =
serde_json::from_slice::<Vec<Data>>(include_bytes!("../../fixtures/local_docs.json"))
.unwrap();
for ws in json {
let data = &ws.blob[5..=(ws.blob.len() - 2)];
if let Ok(data) = decode_hex(data) {
match parse_doc_update(data.clone()) {
Ok(update) => {
println!(
"workspace: {}, global structs: {}, total structs: {}",
ws.workspace,
update.structs.len(),
update.structs.iter().map(|s| s.1.len()).sum::<usize>()
);
}
Err(_e) => {
std::fs::write(
PathBuf::from("./src/fixtures/invalid").join(format!("{}.ydoc", ws.workspace)),
data,
)
.unwrap();
println!("doc error: {}", ws.workspace);
}
}
} else {
println!("error origin data: {}", ws.workspace);
}
}
}
#[test]
fn test_update_iterator() {
loom_model!({
let mut update = Update {
structs: ClientMap::from_iter([
(
0,
VecDeque::from([
struct_item((0, 0), 1),
struct_item((0, 1), 1),
Node::new_skip((0, 2).into(), 1),
]),
),
(
1,
VecDeque::from([
struct_item((1, 0), 1),
Node::Item(Somr::new(
ItemBuilder::new()
.id((1, 1).into())
.left_id(Some((0, 1).into()))
.content(Content::String("c".repeat(2)))
.build(),
)),
]),
),
]),
..Update::default()
};
let mut iter = update.iter(StateVector::default());
assert_eq!(iter.next().unwrap().0.id(), (1, 0).into());
assert_eq!(iter.next().unwrap().0.id(), (0, 0).into());
assert_eq!(iter.next().unwrap().0.id(), (0, 1).into());
assert_eq!(iter.next().unwrap().0.id(), (1, 1).into());
assert_eq!(iter.next(), None);
});
}
#[test]
fn test_update_iterator_with_missing_state() {
loom_model!({
let mut update = Update {
// an item with higher sequence id than local state
structs: ClientMap::from_iter([(0, VecDeque::from([struct_item((0, 4), 1)]))]),
..Update::default()
};
let mut iter = update.iter(StateVector::from([(0, 3)]));
assert_eq!(iter.next(), None);
assert!(!update.pending_structs.is_empty());
assert_eq!(
update
.pending_structs
.get_mut(&0)
.unwrap()
.pop_front()
.unwrap()
.id(),
(0, 4).into()
);
assert!(!update.missing_state.is_empty());
assert_eq!(update.missing_state.get(&0), 3);
});
}
#[test]
fn test_delete_set_iterator() {
let mut update = Update {
delete_set: DeleteSet::from([(0, vec![(0..2), (3..5)])]),
..Update::default()
};
let mut iter = update.delete_set_iter(StateVector::from([(0, 10)]));
assert_eq!(iter.next().unwrap(), (0, 0..2));
assert_eq!(iter.next().unwrap(), (0, 3..5));
assert_eq!(iter.next(), None);
}
#[test]
fn test_delete_set_with_missing_state() {
let mut update = Update {
delete_set: DeleteSet::from([(0, vec![(3..5), (7..12), (13..15)])]),
..Update::default()
};
let mut iter = update.delete_set_iter(StateVector::from([(0, 10)]));
assert_eq!(iter.next().unwrap(), (0, 3..5));
assert_eq!(iter.next().unwrap(), (0, 7..10));
assert_eq!(iter.next(), None);
assert!(!update.pending_delete_set.is_empty());
assert_eq!(
update.pending_delete_set.get(&0).unwrap(),
&OrderRange::from(vec![(10..12), (13..15)])
);
}
#[test]
fn should_add_skip_when_clock_not_continuous() {
loom_model!({
let update = Update {
structs: ClientMap::from_iter([(
0,
VecDeque::from([
struct_item((0, 0), 1),
struct_item((0, 1), 1),
struct_item((0, 10), 1),
Node::new_gc((0, 20).into(), 10),
]),
)]),
..Default::default()
};
let merged = Update::merge([update]);
assert_eq!(
merged.structs.get(&0).unwrap(),
&VecDeque::from([
struct_item((0, 0), 1),
struct_item((0, 1), 1),
Node::new_skip((0, 2).into(), 8),
struct_item((0, 10), 1),
Node::new_skip((0, 11).into(), 9),
Node::new_gc((0, 20).into(), 10),
])
);
});
}
#[test]
fn merged_update_should_not_be_released_in_next_turn() {
loom_model!({
let update = Update {
structs: ClientMap::from_iter([(
0,
VecDeque::from([
struct_item((0, 0), 1),
struct_item((0, 1), 1),
struct_item((0, 10), 1),
Node::new_gc((0, 20).into(), 10),
]),
)]),
..Default::default()
};
let merged = Update::merge([update]);
let update2 = Update {
structs: ClientMap::from_iter([(
0,
VecDeque::from([struct_item((0, 30), 1), Node::new_gc((0, 32).into(), 1)]),
)]),
..Default::default()
};
let merged2 = Update::merge([update2, merged]);
assert_eq!(merged2.structs.get(&0).unwrap().len(), 9);
});
}
}

View File

@@ -0,0 +1,102 @@
use super::*;
pub(crate) struct ItemBuilder {
item: Item,
}
#[allow(dead_code)]
impl ItemBuilder {
pub fn new() -> ItemBuilder {
Self {
item: Item::default(),
}
}
pub fn id(mut self, id: Id) -> ItemBuilder {
self.item.id = id;
self
}
pub fn left(mut self, left: Somr<Item>) -> ItemBuilder {
if let Some(l) = left.get() {
self.item.origin_left_id = Some(l.last_id());
self.item.left = left;
}
self
}
pub fn right(mut self, right: Somr<Item>) -> ItemBuilder {
if let Some(r) = right.get() {
self.item.origin_right_id = Some(r.id);
self.item.right = right;
}
self
}
pub fn left_id(mut self, left_id: Option<Id>) -> ItemBuilder {
self.item.origin_left_id = left_id;
self
}
pub fn right_id(mut self, right_id: Option<Id>) -> ItemBuilder {
self.item.origin_right_id = right_id;
self
}
pub fn parent(mut self, parent: Option<Parent>) -> ItemBuilder {
self.item.parent = parent;
self
}
#[allow(dead_code)]
pub fn parent_sub(mut self, parent_sub: Option<SmolStr>) -> ItemBuilder {
self.item.parent_sub = parent_sub;
self
}
pub fn content(mut self, content: Content) -> ItemBuilder {
self.item.content = content;
self
}
pub fn flags(mut self, flags: ItemFlag) -> ItemBuilder {
self.item.flags = flags;
self
}
pub fn build(self) -> Item {
if self.item.content.countable() {
self.item.flags.set(item_flags::ITEM_COUNTABLE);
}
self.item
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_item_builder() {
loom_model!({
let item = ItemBuilder::new()
.id(Id::new(0, 1))
.left_id(Some(Id::new(2, 3)))
.right_id(Some(Id::new(4, 5)))
.parent(Some(Parent::String("test".into())))
.content(Content::Any(vec![Any::String("Hello".into())]))
.build();
assert_eq!(item.id, Id::new(0, 1));
assert_eq!(item.origin_left_id, Some(Id::new(2, 3)));
assert_eq!(item.origin_right_id, Some(Id::new(4, 5)));
assert!(matches!(item.parent, Some(Parent::String(text)) if text == "test"));
assert_eq!(item.parent_sub, None);
assert_eq!(
item.content,
Content::Any(vec![Any::String("Hello".into())])
);
});
}
}

View File

@@ -0,0 +1,5 @@
mod items;
pub(crate) use items::*;
use super::*;

View File

@@ -0,0 +1,9 @@
mod range;
mod somr;
mod state;
pub use range::*;
pub use somr::*;
pub use state::*;
use super::*;

View File

@@ -0,0 +1,481 @@
use std::{collections::VecDeque, mem, ops::Range};
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum OrderRange {
Range(Range<u64>),
Fragment(VecDeque<Range<u64>>),
}
impl Default for OrderRange {
fn default() -> Self {
Self::Range(0..0)
}
}
impl From<Range<u64>> for OrderRange {
fn from(range: Range<u64>) -> Self {
Self::Range(range)
}
}
impl From<Vec<Range<u64>>> for OrderRange {
fn from(value: Vec<Range<u64>>) -> Self {
Self::Fragment(value.into_iter().collect())
}
}
impl From<VecDeque<Range<u64>>> for OrderRange {
fn from(value: VecDeque<Range<u64>>) -> Self {
Self::Fragment(value)
}
}
#[inline]
fn is_continuous_range(lhs: &Range<u64>, rhs: &Range<u64>) -> bool {
lhs.end >= rhs.start && lhs.start <= rhs.end
}
impl OrderRange {
pub fn ranges_len(&self) -> usize {
match self {
OrderRange::Range(_) => 1,
OrderRange::Fragment(ranges) => ranges.len(),
}
}
pub fn is_empty(&self) -> bool {
match self {
OrderRange::Range(range) => range.is_empty(),
OrderRange::Fragment(vec) => vec.is_empty(),
}
}
pub fn contains(&self, clock: u64) -> bool {
match self {
OrderRange::Range(range) => range.contains(&clock),
OrderRange::Fragment(ranges) => ranges.iter().any(|r| r.contains(&clock)),
}
}
fn check_range_covered(old_vec: &[Range<u64>], new_vec: &[Range<u64>]) -> bool {
let mut old_iter = old_vec.iter();
let mut next_old = old_iter.next();
let mut new_iter = new_vec.iter().peekable();
let mut next_new = new_iter.next();
'new_loop: while let Some(new_range) = next_new {
while let Some(old_range) = next_old {
if old_range.start < new_range.start || old_range.end > new_range.end {
if new_iter.peek().is_some() {
next_new = new_iter.next();
continue 'new_loop;
} else {
return false;
}
}
next_old = old_iter.next();
if let Some(next_old) = &next_old {
if next_old.start > new_range.end {
continue;
}
}
}
next_new = new_iter.next();
}
true
}
/// diff_range returns the difference between the old range and the new
/// range. current range must be covered by the new range
pub fn diff_range(&self, new_range: &OrderRange) -> Vec<Range<u64>> {
let old_vec = self.clone().into_iter().collect::<Vec<_>>();
let new_vec = new_range.clone().into_iter().collect::<Vec<_>>();
if !Self::check_range_covered(&old_vec, &new_vec) {
return Vec::new();
}
let mut diffs = Vec::new();
let mut old_idx = 0;
for new_range in &new_vec {
let mut overlap_ranges = Vec::new();
while old_idx < old_vec.len() && old_vec[old_idx].start <= new_range.end {
overlap_ranges.push(old_vec[old_idx].clone());
old_idx += 1;
}
if overlap_ranges.is_empty() {
diffs.push(new_range.clone());
} else {
let mut last_end = overlap_ranges[0].start;
if last_end > new_range.start {
diffs.push(new_range.start..last_end);
}
for overlap in &overlap_ranges {
if overlap.start > last_end {
diffs.push(last_end..overlap.start);
}
last_end = overlap.end;
}
if new_range.end > last_end {
diffs.push(last_end..new_range.end);
}
}
}
diffs
}
/// Push new range to current one.
/// Range will be merged if overlap exists or turned into fragment if it's
/// not continuous.
pub fn push(&mut self, range: Range<u64>) {
match self {
OrderRange::Range(r) => {
if r.start == r.end {
*self = range.into();
} else if is_continuous_range(r, &range) {
r.end = r.end.max(range.end);
r.start = r.start.min(range.start);
} else {
*self = OrderRange::Fragment(if r.start < range.start {
VecDeque::from([r.clone(), range])
} else {
VecDeque::from([range, r.clone()])
});
}
}
OrderRange::Fragment(ranges) => {
if ranges.is_empty() {
*self = OrderRange::Range(range);
} else {
OrderRange::push_inner(ranges, range);
self.make_single();
}
}
}
}
pub fn pop(&mut self) -> Option<Range<u64>> {
if self.is_empty() {
None
} else {
match self {
OrderRange::Range(range) => Some(mem::replace(range, 0..0)),
OrderRange::Fragment(list) => list.pop_front(),
}
}
}
pub fn merge(&mut self, other: Self) {
self.extend(&other);
}
fn make_fragment(&mut self) {
if let OrderRange::Range(range) = self {
*self = OrderRange::Fragment(if range.is_empty() {
VecDeque::new()
} else {
VecDeque::from([range.clone()])
});
}
}
fn make_single(&mut self) {
if let OrderRange::Fragment(ranges) = self {
if ranges.len() == 1 {
*self = OrderRange::Range(ranges[0].clone());
}
}
}
/// Merge all available ranges list into one.
pub fn squash(&mut self) {
// merge all available ranges
if let OrderRange::Fragment(ranges) = self {
if ranges.is_empty() {
*self = OrderRange::Range(0..0);
return;
}
let mut changed = false;
let mut merged = VecDeque::with_capacity(ranges.len());
let mut cur = ranges[0].clone();
for next in ranges.iter().skip(1) {
if is_continuous_range(&cur, next) {
cur.start = cur.start.min(next.start);
cur.end = cur.end.max(next.end);
changed = true;
} else {
merged.push_back(cur);
cur = next.clone();
}
}
merged.push_back(cur);
if merged.len() == 1 {
*self = OrderRange::Range(merged[0].clone());
} else if changed {
mem::swap(ranges, &mut merged);
}
}
}
fn push_inner(list: &mut VecDeque<Range<u64>>, range: Range<u64>) {
if list.is_empty() {
list.push_back(range);
} else {
let search_result = list.binary_search_by(|r| {
if is_continuous_range(r, &range) {
std::cmp::Ordering::Equal
} else if r.end < range.start {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
});
match search_result {
Ok(idx) => {
let old = &mut list[idx];
list[idx] = old.start.min(range.start)..old.end.max(range.end);
Self::squash_around(list, idx);
}
Err(idx) => {
list.insert(idx, range);
Self::squash_around(list, idx);
}
}
}
}
fn squash_around(list: &mut VecDeque<Range<u64>>, idx: usize) {
if idx > 0 {
let prev = &list[idx - 1];
let cur = &list[idx];
if is_continuous_range(prev, cur) {
list[idx - 1] = prev.start.min(cur.start)..prev.end.max(cur.end);
list.remove(idx);
}
}
if idx < list.len() - 1 {
let next = &list[idx + 1];
let cur = &list[idx];
if is_continuous_range(cur, next) {
list[idx] = cur.start.min(next.start)..cur.end.max(next.end);
list.remove(idx + 1);
}
}
}
}
impl<'a> IntoIterator for &'a OrderRange {
type Item = Range<u64>;
type IntoIter = OrderRangeIter<'a>;
fn into_iter(self) -> Self::IntoIter {
OrderRangeIter {
range: self,
idx: 0,
}
}
}
impl Extend<Range<u64>> for OrderRange {
fn extend<T: IntoIterator<Item = Range<u64>>>(&mut self, other: T) {
self.make_fragment();
match self {
OrderRange::Fragment(ranges) => {
for range in other {
OrderRange::push_inner(ranges, range);
}
self.make_single();
}
_ => unreachable!(),
}
}
}
pub struct OrderRangeIter<'a> {
range: &'a OrderRange,
idx: usize,
}
impl Iterator for OrderRangeIter<'_> {
type Item = Range<u64>;
fn next(&mut self) -> Option<Self::Item> {
match self.range {
OrderRange::Range(range) => {
if self.idx == 0 {
self.idx += 1;
Some(range.clone())
} else {
None
}
}
OrderRange::Fragment(ranges) => {
if self.idx < ranges.len() {
let range = ranges[self.idx].clone();
self.idx += 1;
Some(range)
} else {
None
}
}
}
}
}
#[cfg(test)]
#[allow(clippy::single_range_in_vec_init)]
mod tests {
use super::OrderRange;
#[test]
fn test_range_push() {
let mut range: OrderRange = (0..10).into();
range.push(5..15);
assert_eq!(range, OrderRange::Range(0..15));
// turn to fragment
range.push(20..30);
assert_eq!(range, OrderRange::from(vec![(0..15), (20..30)]));
// auto merge
range.push(15..16);
assert_eq!(range, OrderRange::from(vec![(0..16), (20..30)]));
// squash
range.push(16..20);
assert_eq!(range, OrderRange::Range(0..30));
}
#[test]
fn test_range_pop() {
let mut range: OrderRange = vec![(0..10), (20..30)].into();
assert_eq!(range.pop(), Some(0..10));
let mut range: OrderRange = (0..10).into();
assert_eq!(range.pop(), Some(0..10));
assert!(range.is_empty());
assert_eq!(range.pop(), None);
}
#[test]
fn test_ranges_squash() {
let mut range = OrderRange::from(vec![(0..10), (20..30)]);
// do nothing
range.squash();
assert_eq!(range, OrderRange::from(vec![(0..10), (20..30)]));
// merged into list
range = OrderRange::from(vec![(0..10), (10..20), (30..40)]);
range.squash();
assert_eq!(range, OrderRange::from(vec![(0..20), (30..40)]));
// turn to range
range = OrderRange::from(vec![(0..10), (10..20), (20..30)]);
range.squash();
assert_eq!(range, OrderRange::Range(0..30));
}
#[test]
fn test_range_covered() {
assert!(!OrderRange::check_range_covered(&[0..1], &[2..3]));
assert!(OrderRange::check_range_covered(&[0..1], &[0..3]));
assert!(!OrderRange::check_range_covered(&[0..1], &[1..3]));
assert!(OrderRange::check_range_covered(&[0..1], &[0..3]));
assert!(OrderRange::check_range_covered(&[1..2], &[0..3]));
assert!(OrderRange::check_range_covered(&[1..2, 2..3], &[0..3]));
assert!(!OrderRange::check_range_covered(
&[1..2, 2..3, 3..4],
&[0..3]
));
assert!(OrderRange::check_range_covered(
&[0..1, 2..3],
&[0..2, 2..4]
));
assert!(OrderRange::check_range_covered(
&[0..1, 2..3, 3..4],
&[0..2, 2..4]
),);
}
#[test]
fn test_range_diff() {
{
let old = OrderRange::Range(0..1);
let new = OrderRange::Range(2..3);
let ranges = old.diff_range(&new);
assert_eq!(ranges, vec![]);
}
{
let old = OrderRange::Range(0..10);
let new = OrderRange::Range(0..11);
let ranges = old.diff_range(&new);
assert_eq!(ranges, vec![(10..11)]);
}
{
let old: OrderRange = vec![(0..10), (20..30)].into();
let new: OrderRange = vec![(0..15), (20..30)].into();
let ranges = old.diff_range(&new);
assert_eq!(ranges, vec![(10..15)]);
}
{
let old: OrderRange = vec![(0..3), (5..7), (8..10), (16..18), (21..23)].into();
let new: OrderRange = vec![(0..12), (15..23)].into();
let ranges = old.diff_range(&new);
assert_eq!(ranges, vec![(3..5), (7..8), (10..12), (15..16), (18..21)]);
}
{
let old: OrderRange = vec![(1..6), (8..12)].into();
let new: OrderRange = vec![(0..12), (15..23), (24..28)].into();
let ranges = old.diff_range(&new);
assert_eq!(ranges, vec![(0..1), (6..8), (15..23), (24..28)]);
}
}
#[test]
fn test_range_extend() {
let mut range: OrderRange = (0..10).into();
range.merge((20..30).into());
assert_eq!(range, OrderRange::from(vec![(0..10), (20..30)]));
let mut range: OrderRange = (0..10).into();
range.merge(vec![(10..15), (20..30)].into());
assert_eq!(range, OrderRange::from(vec![(0..15), (20..30)]));
let mut range: OrderRange = vec![(0..10), (20..30)].into();
range.merge((10..20).into());
assert_eq!(range, OrderRange::Range(0..30));
let mut range: OrderRange = vec![(0..10), (20..30)].into();
range.merge(vec![(10..20), (30..40)].into());
assert_eq!(range, OrderRange::Range(0..40));
}
#[test]
fn iter() {
let range: OrderRange = vec![(0..10), (20..30)].into();
assert_eq!(
range.into_iter().collect::<Vec<_>>(),
vec![(0..10), (20..30)]
);
let range: OrderRange = OrderRange::Range(0..10);
assert_eq!(range.into_iter().collect::<Vec<_>>(), vec![(0..10)]);
}
}

View File

@@ -0,0 +1,525 @@
use std::{
cell::UnsafeCell,
fmt::{self, Write},
hash::{Hash, Hasher},
marker::PhantomData,
mem,
ops::{Deref, DerefMut},
ptr::NonNull,
};
use crate::sync::Ordering;
const DANGLING_PTR: usize = usize::MAX;
#[inline]
fn is_dangling<T>(ptr: NonNull<T>) -> bool {
ptr.as_ptr() as usize == DANGLING_PTR
}
/// Heap data with single owner but multiple refs with dangling checking at
/// runtime.
pub(crate) enum Somr<T> {
Owned(Owned<T>),
Ref(Ref<T>),
}
#[repr(transparent)]
pub(crate) struct Owned<T>(NonNull<SomrInner<T>>);
#[repr(transparent)]
pub(crate) struct Ref<T>(NonNull<SomrInner<T>>);
#[cfg(feature = "large_refs")]
type RefAtomicType = crate::sync::AtomicU32;
#[cfg(feature = "large_refs")]
type RefPrimitiveType = u32;
#[cfg(not(feature = "large_refs"))]
type RefAtomicType = crate::sync::AtomicU16;
#[cfg(not(feature = "large_refs"))]
type RefPrimitiveType = u16;
pub(crate) struct SomrInner<T> {
data: Option<UnsafeCell<T>>,
/// increase the size when we really meet the the scenario with refs more
/// then u16::MAX(65535) times
refs: RefAtomicType,
_marker: PhantomData<Option<T>>,
}
pub(crate) struct InnerRefMut<'a, T> {
inner: NonNull<T>,
_marker: PhantomData<&'a mut T>,
}
impl<T> Deref for InnerRefMut<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
unsafe { &*self.inner.as_ptr() }
}
}
impl<T> DerefMut for InnerRefMut<'_, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { &mut *self.inner.as_ptr() }
}
}
unsafe impl<T: Send> Send for Somr<T> {}
unsafe impl<T: Sync> Sync for Somr<T> {}
impl<T> Default for Somr<T> {
fn default() -> Self {
Self::none()
}
}
impl<T> Somr<T> {
pub fn new(data: T) -> Self {
let inner = Box::new(SomrInner {
data: Some(UnsafeCell::new(data)),
refs: RefAtomicType::new(1),
_marker: PhantomData,
});
Self::Owned(Owned(Box::leak(inner).into()))
}
pub fn none() -> Self {
Self::Ref(Ref(NonNull::new(DANGLING_PTR as *mut _).unwrap()))
}
}
impl<T> SomrInner<T> {
fn data_ref(&self) -> Option<&T> {
self.data.as_ref().map(|x| unsafe { &*x.get() })
}
fn data_mut(&self) -> Option<InnerRefMut<T>> {
self.data.as_ref().map(|x| InnerRefMut {
inner: unsafe { NonNull::new_unchecked(x.get()) },
_marker: PhantomData,
})
}
}
impl<T> Somr<T> {
#[inline]
pub fn is_none(&self) -> bool {
self.dangling() || self.inner().data_ref().is_none()
}
#[inline]
pub fn is_some(&self) -> bool {
!self.dangling() && self.inner().data_ref().is_some()
}
pub fn get(&self) -> Option<&T> {
if self.dangling() {
return None;
}
self.inner().data_ref()
}
pub unsafe fn get_unchecked(&self) -> &T {
if self.dangling() {
panic!("Try to visit Somr data that has already been dropped.")
}
match &self.inner().data_ref() {
Some(data) => data,
None => {
panic!("Try to unwrap on None")
}
}
}
#[allow(unused)]
pub fn get_mut(&mut self) -> Option<&mut T> {
if !self.is_owned() || self.dangling() {
return None;
}
let inner = self.inner_mut();
inner.data.as_mut().map(|x| x.get_mut())
}
#[allow(unused)]
pub unsafe fn get_mut_from_ref(&self) -> Option<InnerRefMut<T>> {
if !self.is_owned() || self.dangling() {
return None;
}
let inner = self.inner_mut();
inner.data_mut()
}
pub unsafe fn get_mut_unchecked(&self) -> InnerRefMut<'_, T> {
if self.dangling() {
panic!("Try to visit Somr data that has already been dropped.")
}
match self.inner_mut().data_mut() {
Some(data) => data,
None => {
panic!("Try to unwrap on None")
}
}
}
#[inline]
pub fn is_owned(&self) -> bool {
matches!(self, Self::Owned(_))
}
pub fn swap_take(&mut self) -> Self {
debug_assert!(self.is_owned());
let mut r = self.clone();
mem::swap(self, &mut r);
r
}
#[inline]
fn inner(&self) -> &SomrInner<T> {
debug_assert!(!self.dangling());
unsafe { self.ptr().as_ref() }
}
#[inline]
#[allow(clippy::mut_from_ref)]
fn inner_mut(&self) -> &mut SomrInner<T> {
debug_assert!(!self.dangling());
unsafe { self.ptr().as_mut() }
}
#[inline]
pub fn ptr(&self) -> NonNull<SomrInner<T>> {
match self {
Somr::Owned(ptr) => ptr.0,
Somr::Ref(ptr) => ptr.0,
}
}
#[inline]
pub fn ptr_eq(&self, other: &Self) -> bool {
self.ptr().as_ptr() as usize == other.ptr().as_ptr() as usize
}
#[inline]
fn dangling(&self) -> bool {
is_dangling(self.ptr())
}
}
impl<T> Clone for Somr<T> {
fn clone(&self) -> Self {
if self.dangling() {
return Self::none();
}
let inner = unsafe { &*self.ptr().as_ptr() };
let old_size = inner.refs.fetch_add(1, Ordering::Relaxed);
if old_size == RefPrimitiveType::MAX {
panic!("Too many refs on Somr, maybe we need to increase the limitation now.")
}
Self::Ref(Ref(self.ptr()))
}
}
impl<T> Drop for Owned<T> {
fn drop(&mut self) {
let inner = unsafe { &mut *self.0.as_ptr() };
// ensure all reads are finished
// See [Arc::Drop]
inner.refs.load(Ordering::Acquire);
inner.data.take();
drop(Ref(self.0));
}
}
impl<T> Drop for Ref<T> {
fn drop(&mut self) {
if is_dangling(self.0) {
return;
}
let rc = unsafe { &(*self.0.as_ptr()).refs };
// no other refs
if rc.fetch_sub(1, Ordering::Release) == 1 {
// ensure all reads are finished
// See [Arc::Drop]
rc.load(Ordering::Acquire);
drop(unsafe { Box::from_raw(self.0.as_ptr()) });
}
}
}
impl<T> From<T> for Somr<T> {
fn from(value: T) -> Self {
Somr::new(value)
}
}
impl<T> From<Option<Somr<T>>> for Somr<T> {
fn from(value: Option<Somr<T>>) -> Self {
match value {
Some(somr) => somr,
None => Somr::none(),
}
}
}
pub trait FlattenGet<T> {
#[allow(dead_code)]
fn flatten_get(&self) -> Option<&T>;
}
impl<T> FlattenGet<T> for Option<Somr<T>> {
fn flatten_get(&self) -> Option<&T> {
self.as_ref().and_then(|data| data.get())
}
}
impl<T: PartialEq> PartialEq for Somr<T> {
fn eq(&self, other: &Self) -> bool {
self.ptr() == other.ptr()
|| !self.dangling() && !other.dangling() && self.inner() == other.inner()
}
}
impl<T: PartialEq> PartialEq for SomrInner<T> {
fn eq(&self, other: &Self) -> bool {
self.data_ref() == other.data_ref()
}
}
impl<T: PartialEq> Eq for Somr<T> {
fn assert_receiver_is_total_eq(&self) {}
}
impl<T: PartialOrd> PartialOrd for Somr<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self.get(), other.get()) {
(Some(a), Some(b)) => a.partial_cmp(b),
_ => None,
}
}
}
impl<T> Hash for Somr<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.ptr().hash(state)
}
}
impl<T: fmt::Debug> fmt::Debug for Somr<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_owned() {
f.write_str("Owned(")?;
} else {
f.write_str("Ref(")?;
}
if let Some(value) = self.get() {
fmt::Debug::fmt(value, f)?;
} else {
f.write_str("None")?;
}
f.write_char(')')
}
}
impl<T: fmt::Display> fmt::Display for Somr<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_owned() {
f.write_str("Owned(")?;
} else {
f.write_str("Ref(")?;
}
if let Some(value) = self.get() {
fmt::Display::fmt(value, f)?;
} else {
f.write_str("None")?;
}
f.write_char(')')
}
}
impl<T: Sized> fmt::Pointer for Somr<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Pointer::fmt(&(self.get().unwrap() as *const T), f)
}
}
#[cfg(all(test, not(loom)))]
impl<T: proptest::arbitrary::Arbitrary> proptest::arbitrary::Arbitrary for Somr<T> {
type Parameters = T::Parameters;
type Strategy = proptest::strategy::MapInto<T::Strategy, Self>;
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
proptest::strategy::Strategy::prop_map_into(proptest::arbitrary::any_with::<T>(args))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::loom_model;
#[test]
fn basic_example() {
loom_model!({
let five = Somr::new(5);
assert_eq!(five.get(), Some(&5));
let five_ref = five.clone();
assert!(!five_ref.is_owned());
assert_eq!(five_ref.get(), Some(&5));
assert_eq!(
five_ref.ptr().as_ptr() as usize,
five.ptr().as_ptr() as usize
);
drop(five);
// owner released
assert_eq!(five_ref.get(), None);
});
}
#[test]
fn complex_struct() {
loom_model!({
struct T {
a: usize,
b: String,
}
let t1 = Somr::new(T {
a: 1,
b: "hello".to_owned(),
});
assert_eq!(t1.get().unwrap().a, 1);
assert_eq!(t1.get().unwrap().b.as_str(), "hello");
let t2 = t1.clone();
assert!(!t2.is_owned());
assert_eq!(t2.ptr().as_ptr() as usize, t1.ptr().as_ptr() as usize);
assert_eq!(t2.get().unwrap().a, 1);
assert_eq!(t2.get().unwrap().b.as_str(), "hello");
drop(t1);
assert!(t2.get().is_none());
});
}
#[test]
fn acquire_mut_ref() {
loom_model!({
let mut five = Somr::new(5);
*five.get_mut().unwrap() += 1;
assert_eq!(five.get(), Some(&6));
let five_ref = five.clone();
// only owner can mut ref
assert!(five_ref.get().is_some());
assert!(unsafe { five_ref.get_mut_from_ref() }.is_none());
drop(five);
});
}
#[test]
fn comparison() {
loom_model!({
let five = Somr::new(5);
let five_ref = five.clone();
let another_five = Somr::new(5);
let six = Somr::new(6);
assert_eq!(five, five_ref);
assert_eq!(five, another_five);
assert_eq!(five.ptr().as_ptr(), five_ref.ptr().as_ptr());
assert_ne!(five.ptr().as_ptr(), another_five.ptr().as_ptr());
assert!(six > five);
assert!(six > five_ref);
assert_eq!(five_ref.partial_cmp(&six), Some(std::cmp::Ordering::Less));
drop(five);
assert_eq!(five_ref.partial_cmp(&six), None);
});
}
#[test]
fn represent_none() {
loom_model!({
let none = Somr::<u32>::none();
assert!(!none.is_owned());
assert!(none.is_none());
assert!(none.get().is_none());
});
}
#[test]
fn drop_ref_without_affecting_owner() {
loom_model!({
let five = Somr::new(5);
let five_ref = five.clone();
assert_eq!(five.get().unwrap(), &5);
assert_eq!(five_ref.get().unwrap(), &5);
drop(five_ref);
assert_eq!(five.get().unwrap(), &5);
});
}
#[test]
fn swap_take() {
loom_model!({
let mut five = Somr::new(5);
let owned = five.swap_take();
assert_eq!(owned.get().unwrap(), &5);
assert_eq!(five.get().unwrap(), &5);
assert!(owned.is_owned());
assert!(!five.is_owned());
});
}
// This is UB if we didn't use `UnsafeCell` in `Somr`
#[test]
fn test_inner_mut() {
loom_model!({
let five = Somr::new(5);
fn add(a: &Somr<i32>, b: &Somr<i32>) {
unsafe { a.get_mut_from_ref() }
.map(|mut x| *x += *b.get().unwrap())
.unwrap();
}
add(&five, &five);
assert_eq!(five.get().copied().unwrap(), 10);
});
}
}

View File

@@ -0,0 +1,140 @@
use std::ops::{Deref, DerefMut};
use super::{
Client, ClientMap, Clock, CrdtRead, CrdtReader, CrdtWrite, CrdtWriter, HashMapExt, Id,
JwstCodecResult, HASHMAP_SAFE_CAPACITY,
};
#[derive(Default, Debug, PartialEq, Clone)]
pub struct StateVector(ClientMap<Clock>);
impl StateVector {
pub fn set_max(&mut self, client: Client, clock: Clock) {
self
.entry(client)
.and_modify(|m_clock| {
if *m_clock < clock {
*m_clock = clock;
}
})
.or_insert(clock);
}
pub fn get(&self, client: &Client) -> Clock {
*self.0.get(client).unwrap_or(&0)
}
pub fn contains(&self, id: &Id) -> bool {
id.clock <= self.get(&id.client)
}
pub fn set_min(&mut self, client: Client, clock: Clock) {
self
.entry(client)
.and_modify(|m_clock| {
if *m_clock > clock {
*m_clock = clock;
}
})
.or_insert(clock);
}
pub fn iter(&self) -> impl Iterator<Item = (&Client, &Clock)> {
self.0.iter()
}
pub fn merge_with(&mut self, other: &Self) {
for (client, clock) in other.iter() {
self.set_min(*client, *clock);
}
}
}
impl Deref for StateVector {
type Target = ClientMap<Clock>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for StateVector {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl<const N: usize> From<[(Client, Clock); N]> for StateVector {
fn from(value: [(Client, Clock); N]) -> Self {
let mut map = ClientMap::with_capacity(N);
for (client, clock) in value {
map.insert(client, clock);
}
Self(map)
}
}
impl<R: CrdtReader> CrdtRead<R> for StateVector {
fn read(decoder: &mut R) -> JwstCodecResult<Self> {
let len = decoder.read_var_u64()? as usize;
// See: [HASHMAP_SAFE_CAPACITY]
let mut map = ClientMap::with_capacity(len.min(HASHMAP_SAFE_CAPACITY));
for _ in 0..len {
let client = decoder.read_var_u64()?;
let clock = decoder.read_var_u64()?;
map.insert(client, clock);
}
map.shrink_to_fit();
Ok(Self(map))
}
}
impl<W: CrdtWriter> CrdtWrite<W> for StateVector {
fn write(&self, encoder: &mut W) -> JwstCodecResult {
encoder.write_var_u64(self.len() as u64)?;
for (client, clock) in self.iter() {
encoder.write_var_u64(*client)?;
encoder.write_var_u64(*clock)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_state_vector_basic() {
let mut state_vector = StateVector::from([(1, 1), (2, 2), (3, 3)]);
assert_eq!(state_vector.len(), 3);
assert_eq!(state_vector.get(&1), 1);
state_vector.set_min(1, 0);
assert_eq!(state_vector.get(&1), 0);
state_vector.set_max(1, 4);
assert_eq!(state_vector.get(&1), 4);
// set inexistent client
state_vector.set_max(4, 1);
assert_eq!(state_vector.get(&4), 1);
// same client with larger clock
assert!(!state_vector.contains(&(1, 5).into()));
}
#[test]
fn test_state_vector_merge() {
let mut state_vector = StateVector::from([(1, 1), (2, 2), (3, 3)]);
let other_state_vector = StateVector::from([(1, 5), (2, 6), (3, 7)]);
state_vector.merge_with(&other_state_vector);
assert_eq!(state_vector, StateVector::from([(3, 3), (1, 1), (2, 2)]));
}
}

View File

@@ -0,0 +1,656 @@
use super::{history::StoreHistory, publisher::DocPublisher, store::StoreRef, *};
use crate::sync::{Arc, RwLock};
#[cfg(feature = "debug")]
#[derive(Debug, Clone)]
pub struct DocStoreStatus {
pub nodes: usize,
pub delete_sets: usize,
pub types: usize,
pub dangling_types: usize,
pub pending_nodes: usize,
}
/// [DocOptions] used to create a new [Doc]
///
/// ```
/// use y_octo::DocOptions;
///
/// let doc = DocOptions::new()
/// .with_client_id(1)
/// .with_guid("guid".into())
/// .auto_gc(true)
/// .build();
///
/// assert_eq!(doc.guid(), "guid")
/// ```
#[derive(Clone, Debug)]
pub struct DocOptions {
pub guid: String,
pub client_id: u64,
pub gc: bool,
}
impl Default for DocOptions {
fn default() -> Self {
if cfg!(any(test, feature = "bench")) {
Self {
client_id: 1,
guid: "test".into(),
gc: true,
}
} else {
/// It tends to generate small numbers.
/// Since the client id will be included in all crdt items, the
/// small client helps to reduce the binary size.
///
/// NOTE: The probability of 36% of the random number generated by
/// this function is greater than [u32::MAX]
fn prefer_small_random() -> u64 {
use rand::{distr::Distribution, rng};
use rand_distr::Exp;
let scale_factor = u16::MAX as f64;
let v: f64 = Exp::new(1.0 / scale_factor)
.map(|exp| exp.sample(&mut rng()))
.unwrap_or_else(|_| rand::random());
(v * scale_factor) as u64
}
Self {
client_id: prefer_small_random(),
guid: nanoid::nanoid!(),
gc: true,
}
}
}
}
impl DocOptions {
pub fn new() -> Self {
Self::default()
}
pub fn with_client_id(mut self, client_id: u64) -> Self {
self.client_id = client_id;
self
}
pub fn with_guid(mut self, guid: String) -> Self {
self.guid = guid;
self
}
pub fn auto_gc(mut self, gc: bool) -> Self {
self.gc = gc;
self
}
pub fn build(self) -> Doc {
Doc::with_options(self)
}
}
impl From<DocOptions> for Any {
fn from(value: DocOptions) -> Self {
Any::Object(HashMap::from_iter([
("gc".into(), value.gc.into()),
("guid".into(), value.guid.into()),
]))
}
}
impl TryFrom<Any> for DocOptions {
type Error = JwstCodecError;
fn try_from(value: Any) -> Result<Self, Self::Error> {
match value {
Any::Object(map) => {
let mut options = DocOptions::default();
for (key, value) in map {
match key.as_str() {
"gc" => {
options.gc = bool::try_from(value)?;
}
"guid" => {
options.guid = String::try_from(value)?;
}
_ => {}
}
}
Ok(options)
}
_ => Err(JwstCodecError::UnexpectedType("Object")),
}
}
}
#[derive(Debug, Clone)]
pub struct Doc {
client_id: u64,
opts: DocOptions,
pub(crate) store: StoreRef,
pub publisher: Arc<DocPublisher>,
}
unsafe impl Send for Doc {}
unsafe impl Sync for Doc {}
impl Default for Doc {
fn default() -> Self {
Doc::new()
}
}
impl PartialEq for Doc {
fn eq(&self, other: &Self) -> bool {
self.client_id == other.client_id
}
}
impl Doc {
pub fn new() -> Self {
Self::with_options(DocOptions::default())
}
pub fn with_options(options: DocOptions) -> Self {
let store = Arc::new(RwLock::new(DocStore::with_client(options.client_id)));
let publisher = Arc::new(DocPublisher::new(store.clone()));
Self {
client_id: options.client_id,
opts: options,
store,
publisher,
}
}
pub fn with_client(client_id: u64) -> Self {
DocOptions::new().with_client_id(client_id).build()
}
pub fn client(&self) -> Client {
self.client_id
}
pub fn clients(&self) -> Vec<u64> {
self.store.read().unwrap().clients()
}
pub fn history(&self) -> StoreHistory {
let history = StoreHistory::new(&self.store);
history.resolve();
history
}
#[cfg(feature = "debug")]
pub fn store_status(&self) -> DocStoreStatus {
let store = self.store.read().unwrap();
DocStoreStatus {
nodes: store.total_nodes(),
delete_sets: store.total_delete_sets(),
types: store.total_types(),
dangling_types: store.total_dangling_types(),
pending_nodes: store.total_pending_nodes(),
}
}
pub fn options(&self) -> &DocOptions {
&self.opts
}
pub fn guid(&self) -> &str {
self.opts.guid.as_str()
}
// TODO:
// provide a better way instead of `_v1` methods
// when implementing `v2` binary format
pub fn try_from_binary_v1<T: AsRef<[u8]>>(binary: T) -> JwstCodecResult<Self> {
Self::try_from_binary_v1_with_options(binary, DocOptions::default())
}
pub fn try_from_binary_v1_with_options<T: AsRef<[u8]>>(
binary: T,
options: DocOptions,
) -> JwstCodecResult<Self> {
let mut doc = Doc::with_options(options);
doc.apply_update_from_binary_v1(binary)?;
Ok(doc)
}
pub fn apply_update_from_binary_v1<T: AsRef<[u8]>>(&mut self, binary: T) -> JwstCodecResult {
let mut decoder = RawDecoder::new(binary.as_ref());
let update = Update::read(&mut decoder)?;
self.apply_update(update)
}
pub fn apply_update(&mut self, mut update: Update) -> JwstCodecResult {
let mut store = self.store.write().unwrap();
let mut retry = false;
loop {
for (mut s, offset) in update.iter(store.get_state_vector()) {
if let Node::Item(item) = &mut s {
debug_assert!(item.is_owned());
let mut item = unsafe { item.get_mut_unchecked() };
store.repair(&mut item, self.store.clone())?;
}
store.integrate(s, offset, None)?;
}
for (client, range) in update.delete_set_iter(store.get_state_vector()) {
store.delete_range(client, range)?;
}
if let Some(mut pending_update) = store.pending.take() {
if pending_update
.missing_state
.iter()
.any(|(client, clock)| *clock < store.get_state(*client))
{
// new update has been applied to the doc, need to re-integrate
retry = true;
}
for (client, range) in pending_update.delete_set_iter(store.get_state_vector()) {
store.delete_range(client, range)?;
}
if update.is_pending_empty() {
update = pending_update;
} else {
// drain all pending state to pending update for later iteration
update.drain_pending_state();
Update::merge_into(&mut update, [pending_update]);
}
} else {
// no pending update at store
// no pending update in current iteration
// thank god, all clean
if update.is_pending_empty() {
break;
} else {
// need to turn all pending state into update for later iteration
update.drain_pending_state();
retry = false;
};
}
// can't integrate any more, save the pending update
if !retry {
if !update.is_empty() {
store.pending.replace(update);
}
break;
}
}
Ok(())
}
pub fn keys(&self) -> Vec<String> {
let store = self.store.read().unwrap();
store.types.keys().cloned().collect()
}
pub fn get_or_create_text<S: AsRef<str>>(&self, name: S) -> JwstCodecResult<Text> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Text)
.set_name(name.as_ref().to_string())
.build()
}
pub fn create_text(&self) -> JwstCodecResult<Text> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Text)
.build()
}
pub fn get_or_create_array<S: AsRef<str>>(&self, str: S) -> JwstCodecResult<Array> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Array)
.set_name(str.as_ref().to_string())
.build()
}
pub fn create_array(&self) -> JwstCodecResult<Array> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Array)
.build()
}
pub fn get_or_create_map<S: AsRef<str>>(&self, str: S) -> JwstCodecResult<Map> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Map)
.set_name(str.as_ref().to_string())
.build()
}
pub fn create_map(&self) -> JwstCodecResult<Map> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Map)
.build()
}
pub fn get_map(&self, str: &str) -> JwstCodecResult<Map> {
YTypeBuilder::new(self.store.clone())
.with_kind(YTypeKind::Map)
.set_name(str.to_string())
.build_exists()
}
pub fn encode_update_v1(&self) -> JwstCodecResult<Vec<u8>> {
self.encode_state_as_update_v1(&StateVector::default())
}
pub fn encode_state_as_update_v1(&self, sv: &StateVector) -> JwstCodecResult<Vec<u8>> {
let update = self.encode_state_as_update(sv)?;
let mut encoder = RawEncoder::default();
update.write(&mut encoder)?;
Ok(encoder.into_inner())
}
pub fn encode_update(&self) -> JwstCodecResult<Update> {
self.encode_state_as_update(&StateVector::default())
}
pub fn encode_state_as_update(&self, sv: &StateVector) -> JwstCodecResult<Update> {
self.store.read().unwrap().diff_state_vector(sv, true)
}
pub fn get_state_vector(&self) -> StateVector {
self.store.read().unwrap().get_state_vector()
}
pub fn subscribe(&self, cb: impl Fn(&[u8], &[History]) + Sync + Send + 'static) {
self.publisher.subscribe(cb);
}
pub fn unsubscribe_all(&self) {
self.publisher.unsubscribe_all();
}
pub fn subscribe_count(&self) -> usize {
self.publisher.count()
}
pub fn gc(&self) -> JwstCodecResult<()> {
self.store.write().unwrap().optimize()
}
}
#[cfg(test)]
mod tests {
use yrs::{types::ToJson, updates::decoder::Decode, Array, Map, Options, Transact};
use super::*;
use crate::sync::{AtomicU8, Ordering};
#[test]
fn test_encode_state_as_update() {
let yrs_options_left = Options::default();
let yrs_options_right = Options::default();
loom_model!({
let (binary, binary_new) = if cfg!(miri) {
let doc = Doc::new();
let mut map = doc.get_or_create_map("abc").unwrap();
map.insert("a".to_string(), 1).unwrap();
let binary = doc.encode_update_v1().unwrap();
let doc_new = Doc::new();
let mut array = doc_new.get_or_create_array("array").unwrap();
array.insert(0, "array_value").unwrap();
let binary_new = doc.encode_update_v1().unwrap();
(binary, binary_new)
} else {
let yrs_doc = yrs::Doc::with_options(yrs_options_left.clone());
let map = yrs_doc.get_or_insert_map("abc");
let mut trx = yrs_doc.transact_mut();
map.insert(&mut trx, "a", 1);
let binary = trx.encode_update_v1();
let yrs_doc_new = yrs::Doc::with_options(yrs_options_right.clone());
let array = yrs_doc_new.get_or_insert_array("array");
let mut trx = yrs_doc_new.transact_mut();
array.insert(&mut trx, 0, "array_value");
let binary_new = trx.encode_update_v1();
(binary, binary_new)
};
let mut doc = Doc::try_from_binary_v1(binary).unwrap();
let mut doc_new = Doc::try_from_binary_v1(binary_new).unwrap();
let diff_update = doc_new
.encode_state_as_update_v1(&doc.get_state_vector())
.unwrap();
let diff_update_reverse = doc
.encode_state_as_update_v1(&doc_new.get_state_vector())
.unwrap();
doc.apply_update_from_binary_v1(diff_update).unwrap();
doc_new
.apply_update_from_binary_v1(diff_update_reverse)
.unwrap();
assert_eq!(
doc.encode_update_v1().unwrap(),
doc_new.encode_update_v1().unwrap()
);
});
}
#[test]
#[cfg_attr(any(miri, loom), ignore)]
fn test_array_create() {
let yrs_options = yrs::Options::default();
let json = serde_json::json!([42.0, -42.0, true, false, "hello", "world", [1.0]]);
{
let doc = yrs::Doc::with_options(yrs_options.clone());
let array = doc.get_or_insert_array("abc");
let mut trx = doc.transact_mut();
array.insert(&mut trx, 0, 42);
array.insert(&mut trx, 1, -42);
array.insert(&mut trx, 2, true);
array.insert(&mut trx, 3, false);
array.insert(&mut trx, 4, "hello");
array.insert(&mut trx, 5, "world");
let sub_array = yrs::ArrayPrelim::default();
let sub_array = array.insert(&mut trx, 6, sub_array);
sub_array.insert(&mut trx, 0, 1);
drop(trx);
let config = assert_json_diff::Config::new(assert_json_diff::CompareMode::Strict)
.numeric_mode(assert_json_diff::NumericMode::AssumeFloat);
assert_json_diff::assert_json_matches!(array.to_json(&doc.transact()), json, config);
};
let binary = {
let doc = Doc::new();
let mut array = doc.get_or_create_array("abc").unwrap();
array.insert(0, 42).unwrap();
array.insert(1, -42).unwrap();
array.insert(2, true).unwrap();
array.insert(3, false).unwrap();
array.insert(4, "hello").unwrap();
array.insert(5, "world").unwrap();
let mut sub_array = doc.create_array().unwrap();
array.insert(6, sub_array.clone()).unwrap();
// FIXME: array need insert first to compatible with yrs
sub_array.insert(0, 1).unwrap();
doc.encode_update_v1().unwrap()
};
let ydoc = yrs::Doc::with_options(yrs_options);
let array = ydoc.get_or_insert_array("abc");
let mut trx = ydoc.transact_mut();
trx
.apply_update(yrs::Update::decode_v1(&binary).unwrap())
.unwrap();
let config = assert_json_diff::Config::new(assert_json_diff::CompareMode::Strict)
.numeric_mode(assert_json_diff::NumericMode::AssumeFloat);
assert_json_diff::assert_json_matches!(array.to_json(&trx), json, config);
let mut doc = Doc::new();
let array = doc.get_or_create_array("abc").unwrap();
doc.apply_update_from_binary_v1(binary).unwrap();
let list = array.iter().collect::<Vec<_>>();
assert!(list.len() == 7);
assert!(matches!(list[6], Value::Array(_)));
}
#[test]
#[ignore = "inaccurate timing on ci, need for more accurate timing testing"]
fn test_subscribe() {
loom_model!({
let doc = Doc::default();
let doc_clone = doc.clone();
let count = Arc::new(AtomicU8::new(0));
let count_clone1 = count.clone();
let count_clone2 = count.clone();
doc.subscribe(move |_, _| {
count_clone1.fetch_add(1, Ordering::SeqCst);
});
doc_clone.subscribe(move |_, _| {
count_clone2.fetch_add(1, Ordering::SeqCst);
});
doc_clone
.get_or_create_array("abc")
.unwrap()
.insert(0, 42)
.unwrap();
// wait observer, cycle once every 100mm
std::thread::sleep(std::time::Duration::from_millis(200));
assert_eq!(count.load(Ordering::SeqCst), 2);
});
}
#[test]
fn test_repeated_applied_pending_update() {
// generate a pending update
// update: [1, 1, 1, 0, 39, 1, 4, 116, 101, 115, 116, 3, 109, 97, 112, 1, 0]
// update: [1, 1, 1, 1, 40, 0, 1, 0, 11, 115, 117, 98, 95, 109, 97, 112, 95,
// 107, 101, 121, 1, 119, 13, 115, 117, 98, 95, 109, 97, 112, 95, 118, 97, 108,
// 117, 101, 0]
// {
// let doc1 = Doc::default();
// doc1.subscribe(|update| {
// println!("update: {:?}", update);
// });
// let mut map = doc1.get_or_create_map("test").unwrap();
// std::thread::sleep(std::time::Duration::from_millis(500));
// let mut sub_map = doc1.create_map().unwrap();
// map.insert("map", sub_map.clone()).unwrap();
// std::thread::sleep(std::time::Duration::from_millis(500));
// sub_map.insert("sub_map_key", "sub_map_value").unwrap();
// std::thread::sleep(std::time::Duration::from_millis(500));
// }
loom_model!({
let mut doc = Doc::default();
doc
.apply_update_from_binary_v1(vec![
1, 1, 1, 1, 40, 0, 1, 0, 11, 115, 117, 98, 95, 109, 97, 112, 95, 107, 101, 121, 1, 119,
13, 115, 117, 98, 95, 109, 97, 112, 95, 118, 97, 108, 117, 101, 0,
])
.unwrap();
let pending_size = doc
.store
.read()
.unwrap()
.pending
.as_ref()
.unwrap()
.structs
.iter()
.map(|s| s.1.len())
.sum::<usize>();
doc
.apply_update_from_binary_v1(vec![
1, 1, 1, 1, 40, 0, 1, 0, 11, 115, 117, 98, 95, 109, 97, 112, 95, 107, 101, 121, 1, 119,
13, 115, 117, 98, 95, 109, 97, 112, 95, 118, 97, 108, 117, 101, 0,
])
.unwrap();
// pending nodes should not grow up after apply same pending update
assert_eq!(
pending_size,
doc
.store
.read()
.unwrap()
.pending
.as_ref()
.unwrap()
.structs
.iter()
.map(|s| s.1.len())
.sum::<usize>()
);
});
}
#[test]
fn test_update_from_vec_ref() {
loom_model!({
let doc = Doc::new();
let mut text = doc.get_or_create_text("text").unwrap();
text.insert(0, "hello world").unwrap();
let update = doc.encode_update_v1().unwrap();
let doc = Doc::try_from_binary_v1(update).unwrap();
let text = doc.get_or_create_text("text").unwrap();
assert_eq!(&text.to_string(), "hello world");
});
}
#[test]
#[cfg_attr(any(miri, loom), ignore)]
fn test_apply_update() {
let updates = [
include_bytes!("../fixtures/basic.bin").to_vec(),
include_bytes!("../fixtures/database.bin").to_vec(),
include_bytes!("../fixtures/large.bin").to_vec(),
include_bytes!("../fixtures/with-subdoc.bin").to_vec(),
include_bytes!("../fixtures/edge-case-left-right-same-node.bin").to_vec(),
];
for update in updates {
let mut doc = Doc::new();
doc.apply_update_from_binary_v1(&update).unwrap();
}
}
}

View File

@@ -0,0 +1,35 @@
use std::{
collections::HashMap,
hash::{BuildHasher, Hasher},
};
use super::Client;
#[derive(Default)]
pub struct ClientHasher(Client);
impl Hasher for ClientHasher {
fn finish(&self) -> u64 {
self.0
}
fn write(&mut self, _: &[u8]) {}
fn write_u64(&mut self, i: u64) {
self.0 = i
}
}
#[derive(Default, Clone)]
pub struct ClientHasherBuilder;
impl BuildHasher for ClientHasherBuilder {
type Hasher = ClientHasher;
fn build_hasher(&self) -> Self::Hasher {
ClientHasher::default()
}
}
// use ClientID as key
pub type ClientMap<V> = HashMap<Client, V, ClientHasherBuilder>;

View File

@@ -0,0 +1,327 @@
use std::{collections::VecDeque, sync::Arc};
use serde::{Deserialize, Serialize};
use super::{store::StoreRef, *};
use crate::sync::RwLock;
enum ParentNode {
Root(String),
Node(Somr<Item>),
Unknown,
}
#[derive(Clone, Default)]
pub struct HistoryOptions {
pub client: Option<u64>,
/// Only available when client is set
pub skip: Option<usize>,
/// Only available when client is set
pub limit: Option<usize>,
}
#[derive(Debug, Clone, Default)]
pub struct StoreHistory {
store: StoreRef,
parents: Arc<RwLock<HashMap<Id, Somr<Item>>>>,
}
impl StoreHistory {
pub(crate) fn new(store: &StoreRef) -> Self {
Self {
store: store.clone(),
..Default::default()
}
}
pub fn resolve(&self) {
let store = self.store.read().unwrap();
self.resolve_with_store(&store);
}
pub(crate) fn resolve_with_store(&self, store: &DocStore) {
let mut parents = self.parents.write().unwrap();
for node in store.items.values().flat_map(|items| items.iter()) {
let node = node.as_item();
if let Some(item) = node.get() {
parents
.entry(item.id)
.and_modify(|e| {
if *e != node {
*e = node.clone();
}
})
.or_insert(node.clone());
}
}
}
pub fn parse_update(&self, update: &Update) -> Vec<History> {
let store_items = SortedNodes::new(update.structs.iter().collect::<Vec<_>>())
.filter_map(|n| n.as_item().get().cloned())
.collect::<Vec<_>>();
// make items as reference
let mut store_items = store_items.iter().collect::<Vec<_>>();
store_items.sort_by(|a, b| a.id.clock.cmp(&b.id.clock));
self.parse_items(store_items)
}
pub fn parse_delete_sets(
&self,
old_sets: &ClientMap<OrderRange>,
new_sets: &ClientMap<OrderRange>,
) -> Vec<History> {
let store = self.store.read().unwrap();
let deleted_items = new_sets
.iter()
.filter_map(|(id, new_range)| {
// diff range if old range exists, or use new range
let range = old_sets
.get(id)
.map(|r| r.diff_range(new_range).into())
.unwrap_or(new_range.clone());
(!range.is_empty()).then_some((id, range))
})
.filter_map(|(client, range)| {
// check items contains in deleted range
store.items.get(client).map(move |items| {
items
.iter()
.filter(move |i| range.contains(i.clock()))
.filter_map(|i| i.as_item().get().cloned())
})
})
.flatten()
.collect();
self.parse_deleted_items(deleted_items)
}
pub fn parse_store(&self, options: HistoryOptions) -> Vec<History> {
let store_items = {
let client = options
.client
.as_ref()
.and_then(|client| client.ne(&0).then_some(client));
let store = self.store.read().unwrap();
let mut sort_iter: Box<dyn Iterator<Item = Item>> = Box::new(
SortedNodes::new(if let Some(client) = client {
store
.items
.get(client)
.map(|i| vec![(client, i)])
.unwrap_or_default()
} else {
store.items.iter().collect::<Vec<_>>()
})
.filter_map(|n| n.as_item().get().cloned()),
);
if client.is_some() {
// skip and limit only available when client is set
if let Some(skip) = options.skip {
sort_iter = Box::new(sort_iter.skip(skip));
}
if let Some(limit) = options.limit {
sort_iter = Box::new(sort_iter.take(limit));
}
}
sort_iter.collect::<Vec<_>>()
};
// make items as reference
let mut store_items = store_items.iter().collect::<Vec<_>>();
store_items.sort_by(|a, b| a.id.clock.cmp(&b.id.clock));
self.parse_items(store_items)
}
fn parse_items(&self, store_items: Vec<&Item>) -> Vec<History> {
let parents = self.parents.read().unwrap();
let mut histories = vec![];
for item in store_items {
if item.deleted() {
continue;
}
histories.push(History {
id: item.id.to_string(),
parent: Self::parse_path(item, &parents),
content: Value::from(&item.content).to_string(),
action: HistoryAction::Update,
})
}
histories
}
fn parse_deleted_items(&self, deleted_items: Vec<Item>) -> Vec<History> {
let parents = self.parents.read().unwrap();
let mut histories = vec![];
for item in deleted_items {
histories.push(History {
id: item.id.to_string(),
parent: Self::parse_path(&item, &parents),
content: Value::from(&item.content).to_string(),
action: HistoryAction::Delete,
})
}
histories
}
fn parse_path(item: &Item, parents: &HashMap<Id, Somr<Item>>) -> Vec<String> {
let mut path = Vec::new();
let mut cur = item.clone();
while let Some(node) = cur.find_node_with_parent_info() {
path.push(Self::get_node_name(&node));
match Self::get_parent(parents, &node.parent) {
ParentNode::Root(name) => {
path.push(name);
break;
}
ParentNode::Node(parent) => {
if let Some(parent) = parent.get() {
cur = parent.clone();
} else {
break;
}
}
ParentNode::Unknown => {
break;
}
}
}
path.reverse();
path
}
fn get_node_name(item: &Item) -> String {
if let Some(name) = item.parent_sub.clone() {
name.to_string()
} else {
let mut curr = item.clone();
let mut idx = 0;
while let Some(item) = curr.left.get() {
curr = item.clone();
idx += 1;
}
idx.to_string()
}
}
fn get_parent(parents: &HashMap<Id, Somr<Item>>, parent: &Option<Parent>) -> ParentNode {
match parent {
None => ParentNode::Unknown,
Some(Parent::Type(ptr)) => ptr
.ty()
.and_then(|ty| {
ty.item
.get()
.and_then(|i| parents.get(&i.id).map(|p| ParentNode::Node(p.clone())))
.or(ty.root_name.clone().map(ParentNode::Root))
})
.unwrap_or(ParentNode::Unknown),
Some(Parent::String(name)) => ParentNode::Root(name.to_string()),
Some(Parent::Id(id)) => parents
.get(id)
.map(|p| ParentNode::Node(p.clone()))
.unwrap_or(ParentNode::Unknown),
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub enum HistoryAction {
Insert,
Update,
Delete,
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub struct History {
pub id: String,
pub parent: Vec<String>,
pub content: String,
pub action: HistoryAction,
}
pub(crate) struct SortedNodes<'a> {
nodes: Vec<(&'a Client, &'a VecDeque<Node>)>,
current: Option<VecDeque<Node>>,
}
impl<'a> SortedNodes<'a> {
pub fn new(mut nodes: Vec<(&'a Client, &'a VecDeque<Node>)>) -> Self {
nodes.sort_by(|a, b| b.0.cmp(a.0));
let current = nodes.pop().map(|(_, v)| v.clone());
Self { nodes, current }
}
}
impl Iterator for SortedNodes<'_> {
type Item = Node;
fn next(&mut self) -> Option<Self::Item> {
if let Some(current) = self.current.as_mut() {
if let Some(node) = current.pop_back() {
return Some(node);
}
}
if let Some((_, nodes)) = self.nodes.pop() {
self.current = Some(nodes.clone());
self.next()
} else {
None
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_history_client_test() {
loom_model!({
let doc = Doc::default();
let mut map = doc.get_or_create_map("map").unwrap();
let mut sub_map = doc.create_map().unwrap();
map.insert("sub_map".to_string(), sub_map.clone()).unwrap();
sub_map.insert("key".to_string(), "value").unwrap();
assert_eq!(doc.clients()[0], doc.client());
});
}
#[test]
fn parse_history_test() {
loom_model!({
let doc = Doc::default();
let mut map = doc.get_or_create_map("map").unwrap();
let mut sub_map = doc.create_map().unwrap();
map.insert("sub_map".to_string(), sub_map.clone()).unwrap();
sub_map.insert("key".to_string(), "value").unwrap();
let history = StoreHistory::new(&doc.store);
let update = doc.encode_update().unwrap();
assert_eq!(
history.parse_store(Default::default()),
history.parse_update(&update,)
);
});
}
}

View File

@@ -0,0 +1,33 @@
mod awareness;
mod codec;
mod common;
mod document;
mod hasher;
mod history;
mod publisher;
mod store;
mod types;
mod utils;
pub use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
pub use awareness::{Awareness, AwarenessEvent};
pub use codec::*;
pub use common::*;
pub use document::{Doc, DocOptions};
pub use hasher::ClientMap;
pub use history::{History, HistoryOptions, StoreHistory};
use smol_str::SmolStr;
pub(crate) use store::DocStore;
pub use types::*;
pub use utils::*;
use super::*;
/// NOTE:
/// - We do not use [HashMap::with_capacity(num_of_clients)] directly here
/// because we don't trust the input data.
/// - For instance, what if the first u64 was somehow set a very big value?
/// - A pre-allocated HashMap with a big capacity may cause OOM.
/// - A kinda safer approach is give it a max capacity of 1024 at first
/// allocation, and then let std makes the growth as need.
pub const HASHMAP_SAFE_CAPACITY: usize = 1 << 10;

View File

@@ -0,0 +1,244 @@
use std::{
thread::{current, sleep, spawn},
time::Duration,
};
use log::{debug, trace};
use super::{history::StoreHistory, store::StoreRef, *};
use crate::sync::{Arc, AtomicBool, Mutex, Ordering, RwLock};
pub type DocSubscriber = Box<dyn Fn(&[u8], &[History]) + Sync + Send + 'static>;
const OBSERVE_INTERVAL: u64 = 100;
pub struct DocPublisher {
store: StoreRef,
history: StoreHistory,
subscribers: Arc<RwLock<Vec<DocSubscriber>>>,
observer: Arc<Mutex<Option<std::thread::JoinHandle<()>>>>,
observing: Arc<AtomicBool>,
}
impl DocPublisher {
pub(crate) fn new(store: StoreRef) -> Self {
let subscribers = Arc::new(RwLock::new(Vec::<DocSubscriber>::new()));
let history = StoreHistory::new(&store);
history.resolve();
let publisher = Self {
store,
history,
subscribers,
observer: Arc::default(),
observing: Arc::new(AtomicBool::new(false)),
};
if cfg!(not(any(feature = "bench", fuzzing, loom, miri))) {
publisher.start();
}
publisher
}
pub fn start(&self) {
let mut observer = self.observer.lock().unwrap();
let observing = self.observing.clone();
let store = self.store.clone();
let history = self.history.clone();
if observer.is_none() {
let thread_subscribers = self.subscribers.clone();
observing.store(true, Ordering::Release);
debug!("start observing");
let thread = spawn(move || {
let mut last_update = store.read().unwrap().get_state_vector();
let mut last_deletes = store.read().unwrap().delete_set.clone();
loop {
sleep(Duration::from_millis(OBSERVE_INTERVAL));
if !observing.load(Ordering::Acquire) {
debug!("stop observing");
break;
}
let subscribers = thread_subscribers.read().unwrap();
if subscribers.is_empty() {
continue;
}
let store = store.read().unwrap();
let update = store.get_state_vector();
let deletes = store.delete_set.clone();
if update != last_update || deletes != last_deletes {
trace!(
"update: {:?}, last_update: {:?}, {:?}",
update,
last_update,
current().id(),
);
trace!(
"deletes: {:?}, last_deletes: {:?}, {:?}",
deletes,
last_deletes,
current().id(),
);
history.resolve_with_store(&store);
let (binary, history) = match store.diff_state_vector(&last_update, false) {
Ok(update) => {
drop(store);
let history = history
.parse_update(&update)
.into_iter()
.chain(history.parse_delete_sets(&last_deletes, &deletes))
.collect::<Vec<_>>();
let mut encoder = RawEncoder::default();
if let Err(e) = update.write(&mut encoder) {
warn!("Failed to encode document: {}", e);
continue;
}
(encoder.into_inner(), history)
}
Err(e) => {
warn!("Failed to diff document: {}", e);
continue;
}
};
last_update = update;
last_deletes = deletes;
for cb in subscribers.iter() {
use std::panic::{catch_unwind, AssertUnwindSafe};
// catch panic if callback throw
catch_unwind(AssertUnwindSafe(|| {
cb(&binary, &history);
}))
.unwrap_or_else(|e| {
warn!("Failed to call subscriber: {:?}", e);
});
}
} else {
drop(store);
}
}
});
observer.replace(thread);
} else {
debug!("already observing");
}
}
pub fn stop(&self) {
let mut observer = self.observer.lock().unwrap();
if let Some(observer) = observer.take() {
self.observing.store(false, Ordering::Release);
observer.join().unwrap();
}
}
pub(crate) fn count(&self) -> usize {
self.subscribers.read().unwrap().len()
}
pub(crate) fn subscribe(&self, subscriber: impl Fn(&[u8], &[History]) + Send + Sync + 'static) {
self.subscribers.write().unwrap().push(Box::new(subscriber));
}
pub(crate) fn unsubscribe_all(&self) {
self.subscribers.write().unwrap().clear();
}
}
impl std::fmt::Debug for DocPublisher {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DocPublisher").finish()
}
}
impl Drop for DocPublisher {
fn drop(&mut self) {
self.stop();
self.unsubscribe_all();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sync::AtomicUsize;
#[test]
fn test_parse_update_history() {
loom_model!({
let doc = Doc::default();
let ret = [
vec![vec!["(1, 0)", "test.key1", "val1"]],
vec![
vec!["(1, 1)", "test.key2", "val2"],
vec!["(1, 2)", "test.key3", "val3"],
],
vec![
vec!["(1, 3)", "array.0", "val1"],
vec!["(1, 4)", "array.1", "val2"],
vec!["(1, 5)", "array.2", "val3"],
],
];
let cycle = Arc::new(AtomicUsize::new(0));
// update: 24
// history change by (1, 0) at test.key1: val1
// update: 43
// history change by (1, 1) at test.key2: val2
// history change by (1, 2) at test.key3: val3
// update: 40
// history change by (1, 3) at array.0: val1
// history change by (1, 4) at array.1: val2
// history change by (1, 5) at array.2: val3
doc.subscribe(move |u, history| {
println!("update: {}", u.len());
let cycle = cycle.fetch_add(1, Ordering::SeqCst);
let ret = ret[cycle].clone();
for (i, h) in history.iter().enumerate() {
println!(
"history change by {} at {}: {}",
h.id,
h.parent.join("."),
h.content
);
// lost first update by unknown reason in asan test, skip it if asan enabled
if option_env!("ASAN_OPTIONS").is_none() {
let ret = &ret[i];
assert_eq!(h.id, ret[0]);
assert_eq!(h.parent.join("."), ret[1]);
assert_eq!(h.content, ret[2]);
}
}
});
sleep(Duration::from_millis(500));
let mut map = doc.get_or_create_map("test").unwrap();
map.insert("key1".to_string(), "val1").unwrap();
sleep(Duration::from_millis(500));
map.insert("key2".to_string(), "val2").unwrap();
map.insert("key3".to_string(), "val3").unwrap();
sleep(Duration::from_millis(500));
let mut array = doc.get_or_create_array("array").unwrap();
array.push("val1").unwrap();
array.push("val2").unwrap();
array.push("val3").unwrap();
sleep(Duration::from_millis(500));
doc.publisher.stop();
});
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,216 @@
use super::*;
impl_type!(Array);
impl ListType for Array {}
pub struct ArrayIter<'a>(ListIterator<'a>);
impl Iterator for ArrayIter<'_> {
type Item = Value;
fn next(&mut self) -> Option<Self::Item> {
for item in self.0.by_ref() {
if let Some(item) = item.get() {
if item.countable() {
return Some(Value::from(&item.content));
}
}
}
None
}
}
impl Array {
#[inline]
pub fn len(&self) -> u64 {
self.content_len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn get(&self, index: u64) -> Option<Value> {
let (item, offset) = self.get_item_at(index)?;
if let Some(item) = item.get() {
// TODO: rewrite to content.read(&mut [Any])
return match &item.content {
Content::Any(any) => return any.get(offset as usize).map(|any| Value::Any(any.clone())),
_ => Some(Value::from(&item.content)),
};
}
None
}
pub fn iter(&self) -> ArrayIter {
ArrayIter(self.iter_item())
}
pub fn push<V: Into<Value>>(&mut self, val: V) -> JwstCodecResult {
self.insert(self.len(), val)
}
pub fn insert<V: Into<Value>>(&mut self, idx: u64, val: V) -> JwstCodecResult {
self.insert_at(idx, val.into().into())
}
pub fn remove(&mut self, idx: u64, len: u64) -> JwstCodecResult {
self.remove_at(idx, len)
}
}
impl serde::Serialize for Array {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeSeq;
let mut seq = serializer.serialize_seq(Some(self.len() as usize))?;
for item in self.iter() {
seq.serialize_element(&item)?;
}
seq.end()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_yarray_insert() {
let options = DocOptions::default();
loom_model!({
let doc = Doc::with_options(options.clone());
let mut array = doc.get_or_create_array("abc").unwrap();
array.insert(0, " ").unwrap();
array.insert(0, "Hello").unwrap();
array.insert(2, "World").unwrap();
assert_eq!(
array.get(0).unwrap(),
Value::Any(Any::String("Hello".into()))
);
assert_eq!(array.get(1).unwrap(), Value::Any(Any::String(" ".into())));
assert_eq!(
array.get(2).unwrap(),
Value::Any(Any::String("World".into()))
);
});
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_ytext_equal() {
use yrs::{Options, Text, Transact};
let options = DocOptions::default();
let yrs_options = Options::default();
loom_model!({
let doc = yrs::Doc::with_options(yrs_options.clone());
let array = doc.get_or_insert_text("abc");
let mut trx = doc.transact_mut();
array.insert(&mut trx, 0, " ");
array.insert(&mut trx, 0, "Hello");
array.insert(&mut trx, 6, "World");
array.insert(&mut trx, 11, "!");
let buffer = trx.encode_update_v1();
let mut decoder = RawDecoder::new(&buffer);
let update = Update::read(&mut decoder).unwrap();
let mut doc = Doc::with_options(options.clone());
doc.apply_update(update).unwrap();
let array = doc.get_or_create_array("abc").unwrap();
assert_eq!(
array.get(0).unwrap(),
Value::Any(Any::String("Hello".into()))
);
assert_eq!(array.get(5).unwrap(), Value::Any(Any::String(" ".into())));
assert_eq!(
array.get(6).unwrap(),
Value::Any(Any::String("World".into()))
);
assert_eq!(array.get(11).unwrap(), Value::Any(Any::String("!".into())));
});
let options = DocOptions::default();
let yrs_options = Options::default();
loom_model!({
let doc = yrs::Doc::with_options(yrs_options.clone());
let array = doc.get_or_insert_text("abc");
let mut trx = doc.transact_mut();
array.insert(&mut trx, 0, "Hello");
array.insert(&mut trx, 5, " ");
array.insert(&mut trx, 6, "World");
array.insert(&mut trx, 11, "!");
let buffer = trx.encode_update_v1();
let mut decoder = RawDecoder::new(&buffer);
let update = Update::read(&mut decoder).unwrap();
let mut doc = Doc::with_options(options.clone());
doc.apply_update(update).unwrap();
let array = doc.get_or_create_array("abc").unwrap();
assert_eq!(
array.get(0).unwrap(),
Value::Any(Any::String("Hello".into()))
);
assert_eq!(array.get(5).unwrap(), Value::Any(Any::String(" ".into())));
assert_eq!(
array.get(6).unwrap(),
Value::Any(Any::String("World".into()))
);
assert_eq!(array.get(11).unwrap(), Value::Any(Any::String("!".into())));
});
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_yrs_array_decode() {
use yrs::{Array, Transact};
loom_model!({
let update = {
let doc = yrs::Doc::new();
let array = doc.get_or_insert_array("abc");
let mut trx = doc.transact_mut();
array.insert(&mut trx, 0, "hello");
array.insert(&mut trx, 1, "world");
array.insert(&mut trx, 1, " ");
trx.encode_update_v1()
};
let doc = Doc::try_from_binary_v1_with_options(
update.clone(),
DocOptions {
guid: String::from("1"),
client_id: 1,
gc: true,
},
)
.unwrap();
let arr = doc.get_or_create_array("abc").unwrap();
assert_eq!(
arr.get(2).unwrap(),
Value::Any(Any::String("world".to_string()))
)
});
}
}

View File

@@ -0,0 +1,23 @@
use super::*;
pub(crate) struct ListIterator<'a> {
pub(super) _lock: RwLockReadGuard<'a, YType>,
pub(super) cur: Somr<Item>,
}
impl Iterator for ListIterator<'_> {
type Item = Somr<Item>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(item) = self.cur.clone().get() {
let cur = std::mem::replace(&mut self.cur, item.right.clone());
if item.deleted() {
continue;
}
return Some(cur);
}
None
}
}

View File

@@ -0,0 +1,241 @@
mod iterator;
mod search_marker;
pub(crate) use iterator::ListIterator;
pub(crate) use search_marker::MarkerList;
use super::*;
pub(crate) struct ItemPosition {
pub parent: YTypeRef,
pub left: ItemRef,
pub right: ItemRef,
pub index: u64,
pub offset: u64,
}
impl ItemPosition {
pub fn forward(&mut self) {
if let Some(right) = self.right.get() {
if !right.deleted() {
self.index += right.len();
}
self.left = self.right.clone();
self.right = right.right.clone();
} else {
// FAIL
}
}
/// we found a position cursor point in between a splitable item,
/// we need to split the item by the offset.
///
/// before:
/// ---------------------------------
/// ^left ^right
/// ^offset
/// after:
/// ---------------------------------
/// ^left ^right
pub fn normalize(&mut self, store: &mut DocStore) -> JwstCodecResult {
if self.offset > 0 {
debug_assert!(self.left.is_some());
if let Some(left) = self.left.get() {
let (left, right) = store.split_node(left.id, self.offset)?;
self.left = left.as_item();
self.right = right.as_item();
self.index += self.offset;
self.offset = 0;
}
}
Ok(())
}
}
pub(crate) trait ListType: AsInner<Inner = YTypeRef> {
#[inline(always)]
fn content_len(&self) -> u64 {
self.as_inner().ty().unwrap().len
}
fn iter_item(&self) -> ListIterator {
let inner = self.as_inner().ty().unwrap();
ListIterator {
cur: inner.start.clone(),
_lock: inner,
}
}
fn find_pos(&self, inner: &YType, index: u64) -> Option<ItemPosition> {
let mut remaining = index;
let start = inner.start.clone();
let mut pos = ItemPosition {
parent: self.as_inner().clone(),
left: Somr::none(),
right: start,
index: 0,
offset: 0,
};
if pos.right.is_none() {
return Some(pos);
}
if let Some(markers) = &inner.markers {
if let Some(marker) = markers.find_marker(inner, index) {
if marker.index > remaining {
remaining = 0
} else {
remaining -= marker.index;
}
pos.index = marker.index;
pos.left = marker
.ptr
.get()
.map(|ptr| ptr.left.clone())
.unwrap_or_default();
pos.right = marker.ptr;
}
};
while remaining > 0 {
if let Some(item) = pos.right.get() {
if !item.deleted() {
let content_len = item.len();
if remaining < content_len {
pos.offset = remaining;
remaining = 0;
} else {
pos.index += content_len;
remaining -= content_len;
}
}
pos.left = pos.right.clone();
pos.right = item.right.clone();
} else {
return None;
}
}
Some(pos)
}
fn insert_at(&mut self, index: u64, content: Content) -> JwstCodecResult {
if index > self.content_len() {
return Err(JwstCodecError::IndexOutOfBound(index));
}
if let Some((mut store, mut ty)) = self.as_inner().write() {
if let Some(mut pos) = self.find_pos(&ty, index) {
pos.normalize(&mut store)?;
Self::insert_after(&mut ty, &mut store, pos, content)?;
}
} else {
return Err(JwstCodecError::DocReleased);
}
Ok(())
}
fn insert_after(
ty: &mut YType,
store: &mut DocStore,
pos: ItemPosition,
content: Content,
) -> JwstCodecResult {
if let Some(markers) = &ty.markers {
markers.update_marker_changes(pos.index, content.clock_len() as i64);
}
let item = store.create_item(
content,
pos.left.clone(),
pos.right.clone(),
Some(Parent::Type(pos.parent)),
None,
);
store.integrate(Node::Item(item), 0, Some(ty))?;
Ok(())
}
fn get_item_at(&self, index: u64) -> Option<(Somr<Item>, u64)> {
if index >= self.content_len() {
return None;
}
let ty = self.as_inner().ty().unwrap();
if let Some(pos) = self.find_pos(&ty, index) {
if pos.offset == 0 {
return Some((pos.right, 0));
} else {
return Some((pos.left, pos.offset));
}
}
None
}
fn remove_at(&mut self, idx: u64, len: u64) -> JwstCodecResult {
if len == 0 {
return Ok(());
}
if idx >= self.content_len() {
return Err(JwstCodecError::IndexOutOfBound(idx));
}
if let Some((mut store, mut ty)) = self.as_inner().write() {
if let Some(pos) = self.find_pos(&ty, idx) {
Self::remove_after(&mut ty, &mut store, pos, len)?;
}
} else {
return Err(JwstCodecError::DocReleased);
}
Ok(())
}
fn remove_after(
ty: &mut YType,
store: &mut DocStore,
mut pos: ItemPosition,
len: u64,
) -> JwstCodecResult {
pos.normalize(store)?;
let mut remaining = len;
while remaining > 0 {
if let Some(item) = pos.right.get() {
if !item.deleted() {
let content_len = item.len();
if remaining < content_len {
store.split_node(item.id, remaining)?;
remaining = 0;
} else {
remaining -= content_len;
}
store.delete_item(item, Some(ty));
}
pos.forward();
} else {
break;
}
}
if let Some(markers) = &ty.markers {
markers.update_marker_changes(pos.index, -((len - remaining) as i64));
}
Ok(())
}
}

View File

@@ -0,0 +1,340 @@
use std::{
cell::RefCell,
cmp::max,
collections::VecDeque,
ops::{Deref, DerefMut},
};
use super::*;
const MAX_SEARCH_MARKER: usize = 80;
#[derive(Clone, Debug)]
pub(crate) struct SearchMarker {
pub ptr: Somr<Item>,
pub index: u64,
}
impl SearchMarker {
fn new(ptr: Somr<Item>, index: u64) -> Self {
SearchMarker { ptr, index }
}
fn overwrite_marker(&mut self, ptr: Somr<Item>, index: u64) {
self.ptr = ptr;
self.index = index;
}
}
unsafe impl Sync for MarkerList {}
/// in yjs, a timestamp field is used to sort markers and the oldest marker is
/// deleted once the limit is reached. this was designed for optimization
/// purposes for v8. In Rust, we can simply use a [VecDeque] and trust the
/// compiler to optimize. the [VecDeque] can naturally maintain the insertion
/// order, allowing us to know which marker is the oldest without using an extra
/// timestamp field.
///
/// NOTE:
/// A [MarkerList] is always belonging to a [YType],
/// which means whenever [MakerList] is used, we actually have a [YType]
/// instance behind [RwLock] guard already, so it's safe to make the list
/// internal mutable.
#[derive(Debug)]
pub(crate) struct MarkerList(RefCell<VecDeque<SearchMarker>>);
impl Deref for MarkerList {
type Target = RefCell<VecDeque<SearchMarker>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for MarkerList {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl Default for MarkerList {
fn default() -> Self {
Self::new()
}
}
impl MarkerList {
pub fn new() -> Self {
MarkerList(RefCell::new(VecDeque::new()))
}
// mark pos and push to the end of the linked list
fn mark_position(
list: &mut VecDeque<SearchMarker>,
ptr: Somr<Item>,
index: u64,
) -> Option<SearchMarker> {
if list.len() >= MAX_SEARCH_MARKER {
let mut oldest_marker = list.pop_front().unwrap();
oldest_marker.overwrite_marker(ptr, index);
list.push_back(oldest_marker);
} else {
let marker = SearchMarker::new(ptr, index);
list.push_back(marker);
}
list.back().cloned()
}
// update mark position if the index is within the range of the marker
pub fn update_marker_changes(&self, index: u64, len: i64) {
let mut list = self.borrow_mut();
for marker in list.iter_mut() {
if len > 0 {
while let Some(ptr) = marker.ptr.get() {
if !ptr.indexable() {
let left_ref = ptr.left.clone();
if let Some(left) = left_ref.get() {
if left.indexable() {
marker.index -= left.len();
}
marker.ptr = left_ref;
} else {
// remove marker
marker.index = 0;
break;
}
} else {
break;
}
}
}
if marker.ptr.is_some() && (index < marker.index || (len > 0 && index == marker.index)) {
marker.index = max(index as i64, marker.index as i64 + len) as u64;
}
}
list.retain(|marker| marker.index > 0);
}
// find and return the marker that is closest to the index
pub fn find_marker(&self, parent: &YType, index: u64) -> Option<SearchMarker> {
if parent.start.is_none() || index == 0 {
return None;
}
let mut list = self.borrow_mut();
let marker = list
.iter_mut()
.min_by_key(|m| (index as i64 - m.index as i64).abs());
let mut marker_index = marker.as_ref().map(|m| m.index).unwrap_or(0);
let mut item_ptr = marker
.as_ref()
.map(|m| m.ptr.clone())
.unwrap_or_else(|| parent.start.clone());
// TODO: this logic here is a bit messy
// i think it can be implemented with more streamlined code, and then optimized
{
// iterate to the right if possible
while let Some(item) = item_ptr.clone().get() {
if marker_index >= index {
break;
}
let right_ref: ItemRef = item.right.clone();
if right_ref.is_some() {
if item.indexable() {
if index < marker_index + item.len() {
break;
}
marker_index += item.len();
}
item_ptr = right_ref;
} else {
break;
}
}
// iterate to the left if necessary (might be that marker_index > index)
while let Some(item) = item_ptr.clone().get() {
if marker_index <= index {
break;
}
let left_ref: ItemRef = item.left.clone();
if let Some(left) = left_ref.get() {
if left.indexable() {
marker_index -= left.len();
}
item_ptr = left_ref;
} else {
break;
}
}
// we want to make sure that item_ptr can't be merged with left, because that
// would screw up everything in that case just return what we have
// (it is most likely the best marker anyway) iterate to left until
// item_ptr can't be merged with left
while let Some(item) = item_ptr.clone().get() {
let left_ref: ItemRef = item.left.clone();
if let Some(left) = left_ref.get() {
if left.id.client == item.id.client && left.id.clock + left.len() == item.id.clock {
if left.indexable() {
marker_index -= left.len();
}
item_ptr = left_ref;
continue;
}
break;
} else {
break;
}
}
}
match marker {
Some(marker)
if (marker.index as f64 - marker_index as f64).abs()
< parent.len as f64 / MAX_SEARCH_MARKER as f64 =>
{
// adjust existing marker
marker.overwrite_marker(item_ptr, marker_index);
Some(marker.clone())
}
_ => {
// create new marker
Self::mark_position(&mut list, item_ptr, marker_index)
}
}
}
#[allow(dead_code)]
pub fn get_last_marker(&self) -> Option<SearchMarker> {
self.borrow().back().cloned()
}
pub fn replace_marker(&self, raw: Somr<Item>, new: Somr<Item>, len_shift: i64) {
let mut list = self.borrow_mut();
for marker in list.iter_mut() {
if marker.ptr == raw {
marker.ptr = new.clone();
marker.index = ((marker.index as i64) + len_shift) as u64;
}
}
}
}
#[cfg(test)]
mod tests {
#[cfg(not(loom))]
use rand::{Rng, SeedableRng};
#[cfg(not(loom))]
use rand_chacha::ChaCha20Rng;
use yrs::{Array, Options, Transact};
use super::*;
#[test]
fn test_marker_list() {
let options = DocOptions::default();
let yrs_options = Options::default();
loom_model!({
let (client_id, buffer) = if cfg!(miri) {
let doc = Doc::with_options(options.clone());
let mut array = doc.get_or_create_array("abc").unwrap();
array.insert(0, " ").unwrap();
array.insert(0, "Hello").unwrap();
array.insert(2, "World").unwrap();
(doc.client(), doc.encode_update_v1().unwrap())
} else {
let doc = yrs::Doc::with_options(yrs_options.clone());
let array = doc.get_or_insert_array("abc");
let mut trx = doc.transact_mut();
array.insert(&mut trx, 0, " ");
array.insert(&mut trx, 0, "Hello");
array.insert(&mut trx, 2, "World");
(doc.client_id(), trx.encode_update_v1())
};
let mut decoder = RawDecoder::new(&buffer);
let update = Update::read(&mut decoder).unwrap();
let mut doc = Doc::with_options(options.clone());
doc.apply_update(update).unwrap();
let array = doc.get_or_create_array("abc").unwrap();
let marker_list = MarkerList::new();
let marker = marker_list.find_marker(&array.0.ty().unwrap(), 8).unwrap();
assert_eq!(marker.index, 2);
assert_eq!(
marker.ptr,
doc
.store
.read()
.unwrap()
.get_node(Id::new(client_id, 2))
.unwrap()
.as_item()
);
});
}
#[test]
fn test_search_marker_flaky() {
let options = DocOptions::default();
loom_model!({
let doc = Doc::with_options(options.clone());
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, "0").unwrap();
text.insert(1, "1").unwrap();
text.insert(0, "0").unwrap();
});
}
#[cfg(not(loom))]
fn search_with_seed(seed: u64) {
let rand = ChaCha20Rng::seed_from_u64(seed);
let iteration = 20;
let doc = Doc::with_client(1);
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, "This is a string with length 32.").unwrap();
let mut len = text.len();
for i in 0..iteration {
let mut rand: ChaCha20Rng = rand.clone();
let pos = rand.random_range(0..text.len());
let str = format!("hello {i}");
len += str.len() as u64;
text.insert(pos, str).unwrap();
}
assert_eq!(text.len(), len);
assert_eq!(text.to_string().len() as u64, len);
}
#[test]
#[cfg(not(loom))]
fn test_marker_list_with_seed() {
search_with_seed(785590655803394607);
search_with_seed(12958877733367615);
search_with_seed(71776330571528794);
search_with_seed(2207805473582911);
}
}

View File

@@ -0,0 +1,326 @@
use std::{collections::hash_map::Iter, rc::Rc};
use super::*;
use crate::{
doc::{AsInner, Node, Parent, YTypeRef},
impl_type, JwstCodecResult,
};
impl_type!(Map);
pub(crate) trait MapType: AsInner<Inner = YTypeRef> {
fn _insert<V: Into<Value>>(&mut self, key: String, value: V) -> JwstCodecResult {
if let Some((mut store, mut ty)) = self.as_inner().write() {
let left = ty.map.get(&SmolStr::new(&key)).cloned();
let item = store.create_item(
value.into().into(),
left.unwrap_or(Somr::none()),
Somr::none(),
Some(Parent::Type(self.as_inner().clone())),
Some(SmolStr::new(key)),
);
store.integrate(Node::Item(item), 0, Some(&mut ty))?;
}
Ok(())
}
fn _get(&self, key: &str) -> Option<Value> {
self.as_inner().ty().and_then(|ty| {
ty.map.get(key).and_then(|item| {
if let Some(item) = item.get() {
if item.deleted() {
return None;
}
Some(Value::from(&item.content))
} else {
None
}
})
})
}
fn _contains_key(&self, key: &str) -> bool {
if let Some(ty) = self.as_inner().ty() {
ty.map
.get(key)
.and_then(|item| item.get())
.is_some_and(|item| !item.deleted())
} else {
false
}
}
fn _remove(&mut self, key: &str) {
if let Some((mut store, mut ty)) = self.as_inner().write() {
if let Some(item) = ty.map.get(key).cloned() {
if let Some(item) = item.get() {
store.delete_item(item, Some(&mut ty));
}
}
}
}
fn _len(&self) -> u64 {
self._keys().count() as u64
}
fn _iter(&self) -> EntriesInnerIterator {
let ty = self.as_inner().ty();
if let Some(ty) = ty {
let ty = Rc::new(ty);
EntriesInnerIterator {
iter: Some(unsafe { &*Rc::as_ptr(&ty) }.map.iter()),
_lock: Some(ty),
}
} else {
EntriesInnerIterator {
_lock: None,
iter: None,
}
}
}
fn _keys(&self) -> KeysIterator {
KeysIterator(self._iter())
}
fn _values(&self) -> ValuesIterator {
ValuesIterator(self._iter())
}
fn _entries(&self) -> EntriesIterator {
EntriesIterator(self._iter())
}
}
pub(crate) struct EntriesInnerIterator<'a> {
_lock: Option<Rc<RwLockReadGuard<'a, YType>>>,
iter: Option<Iter<'a, SmolStr, ItemRef>>,
}
pub struct KeysIterator<'a>(EntriesInnerIterator<'a>);
pub struct ValuesIterator<'a>(EntriesInnerIterator<'a>);
pub struct EntriesIterator<'a>(EntriesInnerIterator<'a>);
impl<'a> Iterator for EntriesInnerIterator<'a> {
type Item = (&'a str, &'a Item);
fn next(&mut self) -> Option<Self::Item> {
if let Some(iter) = &mut self.iter {
for (k, v) in iter {
if let Some(item) = v.get() {
if !item.deleted() {
return Some((k.as_str(), item));
}
}
}
None
} else {
None
}
}
}
impl<'a> Iterator for KeysIterator<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(|(k, _)| k)
}
}
impl Iterator for ValuesIterator<'_> {
type Item = Value;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(|(_, v)| Value::from(&v.content))
}
}
impl<'a> Iterator for EntriesIterator<'a> {
type Item = (&'a str, Value);
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(|(k, v)| (k, Value::from(&v.content)))
}
}
impl MapType for Map {}
impl Map {
#[inline(always)]
pub fn insert<V: Into<Value>>(&mut self, key: String, value: V) -> JwstCodecResult {
self._insert(key, value)
}
#[inline(always)]
pub fn get(&self, key: &str) -> Option<Value> {
self._get(key)
}
#[inline(always)]
pub fn contains_key(&self, key: &str) -> bool {
self._contains_key(key)
}
#[inline(always)]
pub fn remove(&mut self, key: &str) {
self._remove(key)
}
#[inline(always)]
pub fn len(&self) -> u64 {
self._len()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline(always)]
pub fn iter(&self) -> EntriesIterator {
self._entries()
}
#[inline(always)]
pub fn entries(&self) -> EntriesIterator {
self._entries()
}
#[inline(always)]
pub fn keys(&self) -> KeysIterator {
self._keys()
}
#[inline(always)]
pub fn values(&self) -> ValuesIterator {
self._values()
}
}
impl serde::Serialize for Map {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeMap;
let mut map = serializer.serialize_map(Some(self.len() as usize))?;
for (key, value) in self.iter() {
map.serialize_entry(&key, &value)?;
}
map.end()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{loom_model, Any, Doc};
#[test]
fn test_map_basic() {
loom_model!({
let doc = Doc::new();
let mut map = doc.get_or_create_map("map").unwrap();
map.insert("1".to_string(), "value").unwrap();
assert_eq!(
map.get("1").unwrap(),
Value::Any(Any::String("value".to_string()))
);
assert!(!map.contains_key("nonexistent_key"));
assert_eq!(map.len(), 1);
assert!(map.contains_key("1"));
map.remove("1");
assert!(!map.contains_key("1"));
assert_eq!(map.len(), 0);
});
}
#[test]
fn test_map_equal() {
loom_model!({
let doc = Doc::new();
let mut map = doc.get_or_create_map("map").unwrap();
map.insert("1".to_string(), "value").unwrap();
map.insert("2".to_string(), false).unwrap();
let binary = doc.encode_update_v1().unwrap();
let new_doc = Doc::try_from_binary_v1(binary).unwrap();
let map = new_doc.get_or_create_map("map").unwrap();
assert_eq!(
map.get("1").unwrap(),
Value::Any(Any::String("value".to_string()))
);
assert_eq!(map.get("2").unwrap(), Value::Any(Any::False));
assert_eq!(map.len(), 2);
});
}
#[test]
fn test_map_renew_value() {
loom_model!({
let doc = Doc::new();
let mut map = doc.get_or_create_map("map").unwrap();
map.insert("1".to_string(), "value").unwrap();
map.insert("1".to_string(), "value2").unwrap();
assert_eq!(
map.get("1").unwrap(),
Value::Any(Any::String("value2".to_string()))
);
assert_eq!(map.len(), 1);
});
}
#[test]
fn test_map_re_encode() {
loom_model!({
let binary = {
let doc = Doc::new();
let mut map = doc.get_or_create_map("map").unwrap();
map.insert("1".to_string(), "value1").unwrap();
map.insert("2".to_string(), "value2").unwrap();
doc.encode_update_v1().unwrap()
};
{
let doc = Doc::try_from_binary_v1(binary).unwrap();
let map = doc.get_or_create_map("map").unwrap();
assert_eq!(
map.get("1").unwrap(),
Value::Any(Any::String("value1".to_string()))
);
assert_eq!(
map.get("2").unwrap(),
Value::Any(Any::String("value2".to_string()))
);
}
});
}
#[test]
fn test_map_iter() {
loom_model!({
let doc = Doc::new();
let mut map = doc.get_or_create_map("map").unwrap();
map.insert("1".to_string(), "value1").unwrap();
map.insert("2".to_string(), "value2").unwrap();
let mut vec = map.entries().collect::<Vec<_>>();
// hashmap iteration is in random order instead of insert order
vec.sort_by(|a, b| a.0.cmp(b.0));
assert_eq!(
vec,
vec![
("1", Value::Any(Any::String("value1".to_string()))),
("2", Value::Any(Any::String("value2".to_string())))
]
)
});
}
}

View File

@@ -0,0 +1,376 @@
mod array;
mod list;
mod map;
mod text;
mod value;
mod xml;
use std::{collections::hash_map::Entry, sync::Weak};
pub use array::*;
use list::*;
pub use map::*;
pub use text::*;
pub use value::*;
pub use xml::*;
use super::{
store::{StoreRef, WeakStoreRef},
*,
};
use crate::{
sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
Item, JwstCodecError, JwstCodecResult,
};
#[derive(Debug, Default)]
pub(crate) struct YType {
pub start: Somr<Item>,
pub item: Somr<Item>,
pub map: HashMap<SmolStr, Somr<Item>>,
pub len: u64,
/// The tag name of XMLElement and XMLHook type
pub name: Option<String>,
/// The name of the type that directly belongs the store.
pub root_name: Option<String>,
kind: YTypeKind,
pub markers: Option<MarkerList>,
}
#[derive(Debug, Default, Clone)]
pub(crate) struct YTypeRef {
pub store: WeakStoreRef,
pub inner: Somr<RwLock<YType>>,
}
impl PartialEq for YType {
fn eq(&self, other: &Self) -> bool {
self.root_name == other.root_name
|| (self.start.is_some() && self.start == other.start)
|| self.map == other.map
}
}
impl PartialEq for YTypeRef {
fn eq(&self, other: &Self) -> bool {
self.inner.ptr_eq(&other.inner)
|| match (self.ty(), other.ty()) {
(Some(l), Some(r)) => *l == *r,
(None, None) => true,
_ => false,
}
}
}
impl YType {
pub fn new(kind: YTypeKind, tag_name: Option<String>) -> Self {
YType {
kind,
name: tag_name,
..YType::default()
}
}
pub fn kind(&self) -> YTypeKind {
self.kind
}
pub fn set_kind(&mut self, kind: YTypeKind) -> JwstCodecResult {
std::debug_assert!(kind != YTypeKind::Unknown);
if self.kind() != kind {
if self.kind == YTypeKind::Unknown {
self.kind = kind;
} else {
return Err(JwstCodecError::TypeCastError(kind.as_str()));
}
}
Ok(())
}
}
impl YTypeRef {
pub fn new(kind: YTypeKind, tag_name: Option<String>) -> Self {
Self {
inner: Somr::new(RwLock::new(YType::new(kind, tag_name))),
store: Weak::new(),
}
}
pub fn ty(&self) -> Option<RwLockReadGuard<YType>> {
self.inner.get().and_then(|ty| ty.read().ok())
}
pub fn ty_mut(&self) -> Option<RwLockWriteGuard<YType>> {
self.inner.get().and_then(|ty| ty.write().ok())
}
#[allow(dead_code)]
pub fn store<'a>(&self) -> Option<RwLockReadGuard<'a, DocStore>> {
if let Some(store) = self.store.upgrade() {
let ptr = unsafe { &*Arc::as_ptr(&store) };
Some(ptr.read().unwrap())
} else {
None
}
}
pub fn store_mut<'a>(&self) -> Option<RwLockWriteGuard<'a, DocStore>> {
if let Some(store) = self.store.upgrade() {
let ptr = unsafe { &*Arc::as_ptr(&store) };
Some(ptr.write().unwrap())
} else {
None
}
}
#[allow(dead_code)]
pub fn read(&self) -> Option<(RwLockReadGuard<DocStore>, RwLockReadGuard<YType>)> {
self
.store()
.and_then(|store| self.ty().map(|ty| (store, ty)))
}
pub fn write(&self) -> Option<(RwLockWriteGuard<DocStore>, RwLockWriteGuard<YType>)> {
self
.store_mut()
.and_then(|store| self.ty_mut().map(|ty| (store, ty)))
}
}
pub(crate) struct YTypeBuilder {
store: StoreRef,
/// The tag name of XMLElement and XMLHook type
name: Option<String>,
/// The name of the type that directly belongs the store.
root_name: Option<String>,
kind: YTypeKind,
}
impl YTypeBuilder {
pub fn new(store: StoreRef) -> Self {
Self {
store,
name: None,
root_name: None,
kind: YTypeKind::Unknown,
}
}
pub fn with_kind(mut self, kind: YTypeKind) -> Self {
self.kind = kind;
self
}
pub fn set_name(mut self, name: String) -> Self {
self.root_name = Some(name);
self
}
#[allow(dead_code)]
pub fn set_tag_name(mut self, tag_name: String) -> Self {
self.name = Some(tag_name);
self
}
pub fn build_exists<T: TryFrom<YTypeRef, Error = JwstCodecError>>(self) -> JwstCodecResult<T> {
let store = self.store.read().unwrap();
let ty = if let Some(root_name) = self.root_name {
match store.types.get(&root_name) {
Some(ty) => ty.clone(),
None => {
return Err(JwstCodecError::RootStructNotFound(root_name));
}
}
} else {
return Err(JwstCodecError::TypeCastError("root_name is not set"));
};
drop(store);
T::try_from(ty)
}
pub fn build<T: TryFrom<YTypeRef, Error = JwstCodecError>>(self) -> JwstCodecResult<T> {
let mut store = self.store.write().unwrap();
let ty = if let Some(root_name) = self.root_name {
match store.types.entry(root_name.clone()) {
Entry::Occupied(e) => e.get().clone(),
Entry::Vacant(e) => {
let inner = Somr::new(RwLock::new(YType {
kind: self.kind,
name: self.name,
root_name: Some(root_name),
markers: Self::markers(self.kind),
..Default::default()
}));
let ty = YTypeRef {
store: Arc::downgrade(&self.store),
inner,
};
let ty_ref = ty.clone();
e.insert(ty);
ty_ref
}
}
} else {
let inner = Somr::new(RwLock::new(YType {
kind: self.kind,
name: self.name,
root_name: self.root_name.clone(),
markers: Self::markers(self.kind),
..Default::default()
}));
let ty = YTypeRef {
store: Arc::downgrade(&self.store),
inner,
};
let ty_ref = ty.clone();
store
.dangling_types
.insert(ty.inner.ptr().as_ptr() as usize, ty);
ty_ref
};
drop(store);
T::try_from(ty)
}
fn markers(kind: YTypeKind) -> Option<MarkerList> {
match kind {
YTypeKind::Map => None,
_ => Some(MarkerList::new()),
}
}
}
#[macro_export(local_inner_macros)]
macro_rules! impl_variants {
({$($name: ident: $codec_ref: literal),*}) => {
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum YTypeKind {
$($name,)*
#[default]
Unknown,
}
impl YTypeKind {
pub fn as_str(&self) -> &'static str {
match self {
$(YTypeKind::$name => std::stringify!($name),)*
YTypeKind::Unknown => "Unknown",
}
}
}
impl From<u64> for YTypeKind {
fn from(value: u64) -> Self {
match value {
$($codec_ref => YTypeKind::$name,)*
_ => YTypeKind::Unknown,
}
}
}
impl From<YTypeKind> for u64 {
fn from(value: YTypeKind) -> Self {
std::debug_assert!(value != YTypeKind::Unknown);
match value {
$(YTypeKind::$name => $codec_ref,)*
_ => std::unreachable!(),
}
}
}
};
}
pub(crate) trait AsInner {
type Inner;
fn as_inner(&self) -> &Self::Inner;
}
#[macro_export(local_inner_macros)]
macro_rules! impl_type {
($name: ident) => {
#[derive(Debug, Clone, PartialEq)]
pub struct $name(pub(crate) super::YTypeRef);
unsafe impl Sync for $name {}
unsafe impl Send for $name {}
impl $name {
pub(crate) fn new(inner: super::YTypeRef) -> Self {
Self(inner)
}
}
impl super::AsInner for $name {
type Inner = super::YTypeRef;
#[inline(always)]
fn as_inner(&self) -> &Self::Inner {
&self.0
}
}
impl TryFrom<super::YTypeRef> for $name {
type Error = $crate::JwstCodecError;
fn try_from(value: super::YTypeRef) -> Result<Self, Self::Error> {
if let Some((_, mut inner)) = value.write() {
match inner.kind {
super::YTypeKind::$name => Ok($name::new(value.clone())),
super::YTypeKind::Unknown => {
inner.set_kind(super::YTypeKind::$name)?;
Ok($name::new(value.clone()))
}
_ => Err($crate::JwstCodecError::TypeCastError(std::stringify!(
$name
))),
}
} else {
Err($crate::JwstCodecError::TypeCastError(std::stringify!(
$name
)))
}
}
}
impl $name {
pub(crate) fn from_unchecked(value: super::YTypeRef) -> Self {
$name::new(value.clone())
}
}
impl From<$name> for super::Value {
fn from(value: $name) -> Self {
Self::$name(value)
}
}
};
}
impl_variants!({
Array: 0,
Map: 1,
Text: 2,
XMLElement: 3,
XMLFragment: 4,
XMLHook: 5,
XMLText: 6
// Doc: 9?
});

View File

@@ -0,0 +1,293 @@
use std::fmt::Display;
use super::list::ListType;
use crate::{impl_type, Content, JwstCodecResult};
impl_type!(Text);
impl ListType for Text {}
impl Text {
#[inline]
pub fn len(&self) -> u64 {
self.content_len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn insert<T: ToString>(&mut self, char_index: u64, str: T) -> JwstCodecResult {
self.insert_at(char_index, Content::String(str.to_string()))
}
#[inline]
pub fn remove(&mut self, char_index: u64, len: u64) -> JwstCodecResult {
self.remove_at(char_index, len)
}
}
impl Display for Text {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.iter_item().try_for_each(|item| {
if let Content::String(str) = &item.get().unwrap().content {
write!(f, "{}", str)
} else {
Ok(())
}
})
}
}
impl serde::Serialize for Text {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}
#[cfg(test)]
mod tests {
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha20Rng;
use yrs::{Options, Text, Transact};
#[cfg(not(loom))]
use crate::sync::{Arc, AtomicUsize, Ordering};
use crate::{loom_model, sync::thread, Doc};
#[test]
fn test_manipulate_text() {
loom_model!({
let doc = Doc::new();
let mut text = doc.create_text().unwrap();
text.insert(0, "llo").unwrap();
text.insert(0, "he").unwrap();
text.insert(5, " world").unwrap();
text.insert(6, "great ").unwrap();
text.insert(17, '!').unwrap();
assert_eq!(text.to_string(), "hello great world!");
assert_eq!(text.len(), 18);
text.remove(4, 4).unwrap();
assert_eq!(text.to_string(), "helleat world!");
assert_eq!(text.len(), 14);
});
}
#[test]
#[cfg(not(loom))]
fn test_parallel_insert_text() {
let seed = rand::rng().random();
let rand = ChaCha20Rng::seed_from_u64(seed);
let mut handles = Vec::new();
let doc = Doc::with_client(1);
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, "This is a string with length 32.").unwrap();
let added_len = Arc::new(AtomicUsize::new(32));
// parallel editing text
{
for i in 0..2 {
let mut text = text.clone();
let mut rand = rand.clone();
let len = added_len.clone();
handles.push(thread::spawn(move || {
for j in 0..10 {
let pos = rand.random_range(0..text.len());
let string = format!("hello {}", i * j);
text.insert(pos, &string).unwrap();
len.fetch_add(string.len(), Ordering::SeqCst);
}
}));
}
}
// parallel editing doc
{
for i in 0..2 {
let doc = doc.clone();
let mut rand = rand.clone();
let len = added_len.clone();
handles.push(thread::spawn(move || {
let mut text = doc.get_or_create_text("test").unwrap();
for j in 0..10 {
let pos = rand.random_range(0..text.len());
let string = format!("hello doc{}", i * j);
text.insert(pos, &string).unwrap();
len.fetch_add(string.len(), Ordering::SeqCst);
}
}));
}
}
for handle in handles {
handle.join().unwrap();
}
assert_eq!(text.to_string().len(), added_len.load(Ordering::SeqCst));
assert_eq!(text.len(), added_len.load(Ordering::SeqCst) as u64);
}
#[cfg(not(loom))]
fn parallel_ins_del_text(seed: u64, thread: i32, iteration: i32) {
let doc = Doc::with_client(1);
let rand = ChaCha20Rng::seed_from_u64(seed);
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, "This is a string with length 32.").unwrap();
let mut handles = Vec::new();
let len = Arc::new(AtomicUsize::new(32));
for i in 0..thread {
let len = len.clone();
let mut rand = rand.clone();
let text = text.clone();
handles.push(thread::spawn(move || {
for j in 0..iteration {
let len = len.clone();
let mut text = text.clone();
let ins = i % 2 == 0;
let pos = rand.random_range(0..16);
if ins {
let str = format!("hello {}", i * j);
text.insert(pos, &str).unwrap();
len.fetch_add(str.len(), Ordering::SeqCst);
} else {
text.remove(pos, 6).unwrap();
len.fetch_sub(6, Ordering::SeqCst);
}
}
}));
}
for handle in handles {
handle.join().unwrap();
}
assert_eq!(text.to_string().len(), len.load(Ordering::SeqCst));
assert_eq!(text.len(), len.load(Ordering::SeqCst) as u64);
}
#[test]
#[cfg(not(loom))]
fn test_parallel_ins_del_text() {
// cases that ever broken
// wrong left/right ref
parallel_ins_del_text(973078538, 2, 2);
parallel_ins_del_text(18414938500869652479, 2, 2);
}
#[test]
fn loom_parallel_ins_del_text() {
let seed = rand::rng().random();
let mut rand = ChaCha20Rng::seed_from_u64(seed);
let ranges = (0..20)
.map(|_| rand.random_range(0..16))
.collect::<Vec<_>>();
loom_model!({
let doc = Doc::new();
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, "This is a string with length 32.").unwrap();
// enough for loom
let handles = (0..2)
.map(|i| {
let text = text.clone();
let ranges = ranges.clone();
thread::spawn(move || {
let mut text = text.clone();
let ins = i % 2 == 0;
let pos = ranges[i];
if ins {
let str = format!("hello {}", i);
text.insert(pos, &str).unwrap();
} else {
text.remove(pos, 6).unwrap();
}
})
})
.collect::<Vec<_>>();
for handle in handles {
handle.join().unwrap();
}
});
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_recover_from_yjs_encoder() {
let yrs_options = Options {
client_id: rand::random(),
guid: nanoid::nanoid!().into(),
..Default::default()
};
loom_model!({
let binary = {
let doc = yrs::Doc::with_options(yrs_options.clone());
let text = doc.get_or_insert_text("greating");
let mut trx = doc.transact_mut();
text.insert(&mut trx, 0, "hello");
text.insert(&mut trx, 5, " world!");
text.remove_range(&mut trx, 11, 1);
trx.encode_update_v1()
};
// in loom loop
#[allow(clippy::needless_borrow)]
let doc = Doc::try_from_binary_v1(&binary).unwrap();
let mut text = doc.get_or_create_text("greating").unwrap();
assert_eq!(text.to_string(), "hello world");
text.insert(6, "great ").unwrap();
text.insert(17, '!').unwrap();
assert_eq!(text.to_string(), "hello great world!");
});
}
#[test]
fn test_recover_from_octobase_encoder() {
loom_model!({
let binary = {
let doc = Doc::new();
let mut text = doc.get_or_create_text("greating").unwrap();
text.insert(0, "hello").unwrap();
text.insert(5, " world!").unwrap();
text.remove(11, 1).unwrap();
doc.encode_update_v1().unwrap()
};
let doc = Doc::try_from_binary_v1(binary).unwrap();
let mut text = doc.get_or_create_text("greating").unwrap();
assert_eq!(text.to_string(), "hello world");
text.insert(6, "great ").unwrap();
text.insert(17, '!').unwrap();
assert_eq!(text.to_string(), "hello great world!");
});
}
}

View File

@@ -0,0 +1,159 @@
use std::fmt::Display;
use super::*;
#[derive(Debug, PartialEq)]
pub enum Value {
Any(Any),
Doc(Doc),
Array(Array),
Map(Map),
Text(Text),
XMLElement(XMLElement),
XMLFragment(XMLFragment),
XMLHook(XMLHook),
XMLText(XMLText),
}
impl Value {
pub fn to_any(&self) -> Option<Any> {
match self {
Value::Any(any) => Some(any.clone()),
_ => None,
}
}
pub fn to_array(&self) -> Option<Array> {
match self {
Value::Array(array) => Some(array.clone()),
_ => None,
}
}
pub fn to_map(&self) -> Option<Map> {
match self {
Value::Map(map) => Some(map.clone()),
_ => None,
}
}
pub fn to_text(&self) -> Option<Text> {
match self {
Value::Text(text) => Some(text.clone()),
_ => None,
}
}
pub fn from_vec<T: Into<Any>>(el: Vec<T>) -> Self {
Value::Any(Any::Array(
el.into_iter().map(|item| item.into()).collect::<Vec<_>>(),
))
}
}
impl From<&Content> for Value {
fn from(value: &Content) -> Value {
match value {
Content::Any(any) => Value::Any(if any.len() == 1 {
any[0].clone()
} else {
Any::Array(any.clone())
}),
Content::String(s) => Value::Any(Any::String(s.clone())),
Content::Json(json) => Value::Any(Any::Array(
json
.iter()
.map(|item| {
if let Some(s) = item {
Any::String(s.clone())
} else {
Any::Undefined
}
})
.collect::<Vec<_>>(),
)),
Content::Binary(buf) => Value::Any(Any::Binary(buf.clone())),
Content::Embed(v) => Value::Any(v.clone()),
Content::Type(ty) => match ty.ty().unwrap().kind {
YTypeKind::Array => Value::Array(Array::from_unchecked(ty.clone())),
YTypeKind::Map => Value::Map(Map::from_unchecked(ty.clone())),
YTypeKind::Text => Value::Text(Text::from_unchecked(ty.clone())),
YTypeKind::XMLElement => Value::XMLElement(XMLElement::from_unchecked(ty.clone())),
YTypeKind::XMLFragment => Value::XMLFragment(XMLFragment::from_unchecked(ty.clone())),
YTypeKind::XMLHook => Value::XMLHook(XMLHook::from_unchecked(ty.clone())),
YTypeKind::XMLText => Value::XMLText(XMLText::from_unchecked(ty.clone())),
// actually unreachable
YTypeKind::Unknown => Value::Any(Any::Undefined),
},
Content::Doc { guid: _, opts } => Value::Doc(
DocOptions::try_from(opts.clone())
.expect("Failed to parse doc options")
.build(),
),
Content::Format { .. } => unimplemented!(),
// actually unreachable
Content::Deleted(_) => Value::Any(Any::Undefined),
}
}
}
impl From<Value> for Content {
fn from(value: Value) -> Self {
match value {
Value::Any(any) => Content::from(any),
Value::Doc(doc) => Content::Doc {
guid: doc.guid().to_owned(),
opts: Any::from(doc.options().clone()),
},
Value::Array(v) => Content::Type(v.0),
Value::Map(v) => Content::Type(v.0),
Value::Text(v) => Content::Type(v.0),
Value::XMLElement(v) => Content::Type(v.0),
Value::XMLFragment(v) => Content::Type(v.0),
Value::XMLHook(v) => Content::Type(v.0),
Value::XMLText(v) => Content::Type(v.0),
}
}
}
impl<T: Into<Any>> From<T> for Value {
fn from(value: T) -> Self {
Value::Any(value.into())
}
}
impl From<Doc> for Value {
fn from(value: Doc) -> Self {
Value::Doc(value)
}
}
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::Any(any) => write!(f, "{}", any),
Value::Text(text) => write!(f, "{}", text),
_ => write!(f, ""),
}
}
}
impl serde::Serialize for Value {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Self::Any(any) => any.serialize(serializer),
Self::Array(array) => array.serialize(serializer),
Self::Map(map) => map.serialize(serializer),
Self::Text(text) => text.serialize(serializer),
// Self::XMLElement(xml_element) => xml_element.serialize(serializer),
// Self::XMLFragment(xml_fragment) => xml_fragment.serialize(serializer),
// Self::XMLHook(xml_hook) => xml_hook.serialize(serializer),
// Self::XMLText(xml_text) => xml_text.serialize(serializer),
// Self::Doc(doc) => doc.serialize(serializer),
_ => serializer.serialize_none(),
}
}
}

View File

@@ -0,0 +1,14 @@
use super::list::ListType;
use crate::impl_type;
impl_type!(XMLElement);
impl ListType for XMLElement {}
impl_type!(XMLFragment);
impl ListType for XMLFragment {}
impl_type!(XMLText);
impl ListType for XMLText {}
impl_type!(XMLHook);
impl ListType for XMLHook {}

View File

@@ -0,0 +1,28 @@
use super::*;
pub fn encode_awareness_as_message(awareness: AwarenessStates) -> JwstCodecResult<Vec<u8>> {
let mut buffer = Vec::new();
write_sync_message(&mut buffer, &SyncMessage::Awareness(awareness))
.map_err(|e| JwstCodecError::InvalidWriteBuffer(e.to_string()))?;
Ok(buffer)
}
pub fn encode_update_as_message(update: Vec<u8>) -> JwstCodecResult<Vec<u8>> {
let mut buffer = Vec::new();
write_sync_message(&mut buffer, &SyncMessage::Doc(DocMessage::Update(update)))
.map_err(|e| JwstCodecError::InvalidWriteBuffer(e.to_string()))?;
Ok(buffer)
}
pub fn merge_updates_v1<V: AsRef<[u8]>, I: IntoIterator<Item = V>>(
updates: I,
) -> JwstCodecResult<Update> {
let updates = updates
.into_iter()
.map(Update::decode_v1)
.collect::<JwstCodecResult<Vec<_>>>()?;
Ok(Update::merge(updates))
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
[]

View File

@@ -0,0 +1,67 @@
#[forbid(unsafe_code)]
mod codec;
mod doc;
mod protocol;
mod sync;
pub use codec::*;
pub use doc::{
encode_awareness_as_message, encode_update_as_message, merge_updates_v1, Any, Array, Awareness,
AwarenessEvent, Client, ClientMap, Clock, CrdtRead, CrdtReader, CrdtWrite, CrdtWriter, Doc,
DocOptions, HashMap as AHashMap, HashMapExt, History, HistoryOptions, Id, Map, RawDecoder,
RawEncoder, StateVector, StoreHistory, Text, Update, Value,
};
pub(crate) use doc::{Content, Item};
use log::{debug, warn};
use nom::IResult;
pub use protocol::{
read_sync_message, write_sync_message, AwarenessState, AwarenessStates, DocMessage, SyncMessage,
SyncMessageScanner,
};
use thiserror::Error;
#[derive(Debug, Error, PartialEq)]
pub enum JwstCodecError {
#[error("Unexpected Scenario")]
Unexpected,
#[error("Damaged document: corrupt json data")]
DamagedDocumentJson,
#[error("Incomplete document: {0}")]
IncompleteDocument(String),
#[error("Invalid write buffer: {0}")]
InvalidWriteBuffer(String),
#[error("Content does not support splitting in {0}")]
ContentSplitNotSupport(u64),
#[error("GC or Skip does not support splitting")]
ItemSplitNotSupport,
#[error("update is empty")]
UpdateIsEmpty,
#[error("invalid update")]
UpdateInvalid(#[from] nom::Err<nom::error::Error<usize>>),
#[error("update not fully consumed: {0}")]
UpdateNotFullyConsumed(usize),
#[error("invalid struct clock, expect {expect}, actually {actually}")]
StructClockInvalid { expect: u64, actually: u64 },
#[error("cannot find struct {clock} in {client_id}")]
StructSequenceInvalid { client_id: u64, clock: u64 },
#[error("struct {0} not exists")]
StructSequenceNotExists(u64),
#[error("Invalid parent")]
InvalidParent,
#[error("Parent not found")]
ParentNotFound,
#[error("Invalid struct type, expect item, actually {0}")]
InvalidStructType(&'static str),
#[error("Can not cast known type to {0}")]
TypeCastError(&'static str),
#[error("Can not found root struct with name: {0}")]
RootStructNotFound(String),
#[error("Index {0} out of bound")]
IndexOutOfBound(u64),
#[error("Document has been released")]
DocReleased,
#[error("Unexpected type, expect {0}")]
UnexpectedType(&'static str),
}
pub type JwstCodecResult<T = ()> = Result<T, JwstCodecError>;

View File

@@ -0,0 +1,151 @@
use nom::{multi::count, Parser};
use super::*;
const NULL_STR: &str = "null";
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub struct AwarenessState {
#[cfg_attr(test, proptest(strategy = "0..u32::MAX as u64"))]
pub(crate) clock: u64,
// content is usually a json
pub(crate) content: String,
}
impl AwarenessState {
pub fn new(clock: u64, content: String) -> Self {
AwarenessState { clock, content }
}
pub fn clock(&self) -> u64 {
self.clock
}
pub fn content(&self) -> &str {
&self.content
}
pub fn is_deleted(&self) -> bool {
self.content == NULL_STR
}
pub(crate) fn add_clock(&mut self) {
self.clock += 1;
}
pub(crate) fn set_clock(&mut self, clock: u64) {
self.clock = clock;
}
pub fn set_content(&mut self, content: String) {
self.add_clock();
self.content = content;
}
pub fn delete(&mut self) {
self.set_content(NULL_STR.to_string());
}
}
impl Default for AwarenessState {
fn default() -> Self {
AwarenessState {
clock: 0,
content: NULL_STR.to_string(),
}
}
}
fn read_awareness_state(input: &[u8]) -> IResult<&[u8], (u64, AwarenessState)> {
let (tail, client_id) = read_var_u64(input)?;
let (tail, clock) = read_var_u64(tail)?;
let (tail, content) = read_var_string(tail)?;
Ok((tail, (client_id, AwarenessState { clock, content })))
}
fn write_awareness_state<W: Write>(
buffer: &mut W,
client_id: u64,
state: &AwarenessState,
) -> Result<(), IoError> {
write_var_u64(buffer, client_id)?;
write_var_u64(buffer, state.clock)?;
write_var_string(buffer, state.content.clone())?;
Ok(())
}
pub type AwarenessStates = HashMap<u64, AwarenessState>;
pub fn read_awareness(input: &[u8]) -> IResult<&[u8], AwarenessStates> {
let (tail, len) = read_var_u64(input)?;
let (tail, messages) = count(read_awareness_state, len as usize).parse(tail)?;
Ok((tail, messages.into_iter().collect()))
}
pub fn write_awareness<W: Write>(buffer: &mut W, clients: &AwarenessStates) -> Result<(), IoError> {
write_var_u64(buffer, clients.len() as u64)?;
for (client_id, state) in clients {
write_awareness_state(buffer, *client_id, state)?;
}
Ok(())
}
// TODO(@darkskygit): impl reader/writer
// awareness state message
#[allow(dead_code)]
#[derive(Debug, PartialEq)]
pub struct AwarenessMessage {
clients: AwarenessStates,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_awareness() {
let input = [
3, // count of state
1, 5, 1, 1, // first state
2, 10, 2, 2, 3, // second state
5, 5, 5, 1, 2, 3, 4, 5, // third state
];
let expected = HashMap::from([
(
1,
AwarenessState::new(5, String::from_utf8(vec![1]).unwrap()),
),
(
2,
AwarenessState::new(10, String::from_utf8(vec![2, 3]).unwrap()),
),
(
5,
AwarenessState::new(5, String::from_utf8(vec![1, 2, 3, 4, 5]).unwrap()),
),
]);
{
let (tail, result) = read_awareness(&input).unwrap();
assert!(tail.is_empty());
assert_eq!(result, expected);
}
{
let mut buffer = Vec::new();
// hashmap has not a ordered keys, so buffer not equal each write
// we need re-parse the buffer to check result
write_awareness(&mut buffer, &expected).unwrap();
let (tail, result) = read_awareness(&buffer).unwrap();
assert!(tail.is_empty());
assert_eq!(result, expected);
}
}
}

View File

@@ -0,0 +1,103 @@
use super::*;
// doc sync message
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub enum DocMessage {
// state vector
// TODO: temporarily skipped in the test, because yrs decoding needs to ensure that the update
// in step1 is the correct state vector binary and any data can be included in our
// implementation (we will ensure the correctness of encoding and decoding in the subsequent
// decoding process)
#[cfg_attr(test, proptest(skip))]
Step1(Vec<u8>),
// update
Step2(Vec<u8>),
// update
Update(Vec<u8>),
}
const DOC_MESSAGE_STEP1: u64 = 0;
const DOC_MESSAGE_STEP2: u64 = 1;
const DOC_MESSAGE_UPDATE: u64 = 2;
pub fn read_doc_message(input: &[u8]) -> IResult<&[u8], DocMessage> {
let (tail, step) = read_var_u64(input)?;
match step {
DOC_MESSAGE_STEP1 => {
let (tail, sv) = read_var_buffer(tail)?;
// TODO: decode state vector
Ok((tail, DocMessage::Step1(sv.into())))
}
DOC_MESSAGE_STEP2 => {
let (tail, update) = read_var_buffer(tail)?;
// TODO: decode update
Ok((tail, DocMessage::Step2(update.into())))
}
DOC_MESSAGE_UPDATE => {
let (tail, update) = read_var_buffer(tail)?;
// TODO: decode update
Ok((tail, DocMessage::Update(update.into())))
}
_ => Err(nom::Err::Error(Error::new(input, ErrorKind::Tag))),
}
}
pub fn write_doc_message<W: Write>(buffer: &mut W, msg: &DocMessage) -> Result<(), IoError> {
match msg {
DocMessage::Step1(sv) => {
write_var_u64(buffer, DOC_MESSAGE_STEP1)?;
write_var_buffer(buffer, sv)?;
}
DocMessage::Step2(update) => {
write_var_u64(buffer, DOC_MESSAGE_STEP2)?;
write_var_buffer(buffer, update)?;
}
DocMessage::Update(update) => {
write_var_u64(buffer, DOC_MESSAGE_UPDATE)?;
write_var_buffer(buffer, update)?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_doc_message() {
let messages = [
DocMessage::Step1(vec![0x01, 0x02, 0x03]),
DocMessage::Step2(vec![0x04, 0x05, 0x06]),
DocMessage::Update(vec![0x07, 0x08, 0x09]),
];
for msg in messages {
let mut buffer = Vec::new();
write_doc_message(&mut buffer, &msg).unwrap();
let (tail, decoded) = read_doc_message(&buffer).unwrap();
assert_eq!(tail.len(), 0);
assert_eq!(decoded, msg);
}
// test invalid msg
{
let mut buffer = Vec::new();
let msg = DocMessage::Step1(vec![0x01, 0x02, 0x03]);
write_doc_message(&mut buffer, &msg).unwrap();
buffer[0] = 0xff; // Inject error in message tag
let res = read_doc_message(&buffer);
match res.as_ref().unwrap_err() {
nom::Err::Error(error) => assert_eq!(error.code, ErrorKind::Tag),
_ => panic!("Expected error ErrorKind::Tag, but got {:?}", res),
}
}
}
}

View File

@@ -0,0 +1,23 @@
mod awareness;
mod doc;
mod scanner;
mod sync;
use std::{
collections::HashMap,
io::{Error as IoError, Write},
};
use awareness::{read_awareness, write_awareness};
pub use awareness::{AwarenessState, AwarenessStates};
pub use doc::DocMessage;
use doc::{read_doc_message, write_doc_message};
use log::debug;
use nom::{
error::{Error, ErrorKind},
IResult,
};
pub use scanner::SyncMessageScanner;
pub use sync::{read_sync_message, write_sync_message, SyncMessage};
use super::*;

View File

@@ -0,0 +1,64 @@
use super::*;
pub struct SyncMessageScanner<'a> {
buffer: &'a [u8],
}
impl SyncMessageScanner<'_> {
pub fn new(buffer: &[u8]) -> SyncMessageScanner {
SyncMessageScanner { buffer }
}
}
impl<'a> Iterator for SyncMessageScanner<'a> {
type Item = Result<SyncMessage, nom::Err<nom::error::Error<&'a [u8]>>>;
fn next(&mut self) -> Option<Self::Item> {
if self.buffer.is_empty() {
return None;
}
match read_sync_message(self.buffer) {
Ok((tail, message)) => {
self.buffer = tail;
Some(Ok(message))
}
Err(nom::Err::Incomplete(_))
| Err(nom::Err::Error(nom::error::Error {
code: nom::error::ErrorKind::Eof,
..
}))
| Err(nom::Err::Failure(nom::error::Error {
code: nom::error::ErrorKind::Eof,
..
})) => {
debug!("incomplete sync message");
None
}
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {
use proptest::{collection::vec, prelude::*};
use super::*;
proptest! {
#[test]
#[cfg_attr(miri, ignore)]
fn test_sync_message_scanner(messages in vec(any::<SyncMessage>(), 0..10)) {
let mut buffer = Vec::new();
for message in &messages {
write_sync_message(&mut buffer, message).unwrap();
}
let result: Result<Vec<SyncMessage>, _> = SyncMessageScanner::new(&buffer).collect();
assert_eq!(result.unwrap(), messages);
}
}
}

View File

@@ -0,0 +1,165 @@
use byteorder::WriteBytesExt;
use super::*;
#[derive(Debug, Clone, PartialEq)]
enum MessageType {
Auth,
Awareness,
AwarenessQuery,
Doc,
}
fn read_sync_tag(input: &[u8]) -> IResult<&[u8], MessageType> {
let (tail, tag) = read_var_u64(input)?;
let tag = match tag {
0 => MessageType::Doc,
1 => MessageType::Awareness,
2 => MessageType::Auth,
3 => MessageType::AwarenessQuery,
_ => return Err(nom::Err::Error(Error::new(input, ErrorKind::Tag))),
};
Ok((tail, tag))
}
fn write_sync_tag<W: Write>(buffer: &mut W, tag: MessageType) -> Result<(), IoError> {
let tag: u64 = match tag {
MessageType::Doc => 0,
MessageType::Awareness => 1,
MessageType::Auth => 2,
MessageType::AwarenessQuery => 3,
};
write_var_u64(buffer, tag)?;
Ok(())
}
// sync message
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
pub enum SyncMessage {
Auth(Option<String>),
Awareness(AwarenessStates),
AwarenessQuery,
Doc(DocMessage),
}
pub fn read_sync_message(input: &[u8]) -> IResult<&[u8], SyncMessage> {
let (tail, tag) = read_sync_tag(input)?;
let (tail, message) = match tag {
MessageType::Doc => {
let (tail, doc) = read_doc_message(tail)?;
(tail, SyncMessage::Doc(doc))
}
MessageType::Awareness => {
let (tail, update) = read_var_buffer(tail)?;
(
tail,
SyncMessage::Awareness({
let (awareness_tail, awareness) = read_awareness(update)?;
let tail_len = awareness_tail.len();
if tail_len > 0 {
debug!("awareness update has trailing bytes: {}", tail_len);
debug_assert!(tail_len > 0, "awareness update has trailing bytes");
}
awareness
}),
)
}
MessageType::Auth => {
let (tail, success) = read_var_u64(tail)?;
if success == 1 {
(tail, SyncMessage::Auth(None))
} else {
let (tail, reason) = read_var_string(tail)?;
(tail, SyncMessage::Auth(Some(reason)))
}
}
MessageType::AwarenessQuery => (tail, SyncMessage::AwarenessQuery),
};
Ok((tail, message))
}
pub fn write_sync_message<W: Write>(buffer: &mut W, msg: &SyncMessage) -> Result<(), IoError> {
match msg {
SyncMessage::Auth(reason) => {
const PERMISSION_DENIED: u8 = 0;
const PERMISSION_GRANTED: u8 = 1;
write_sync_tag(buffer, MessageType::Auth)?;
if let Some(reason) = reason {
buffer.write_u8(PERMISSION_DENIED)?;
write_var_string(buffer, reason)?;
} else {
buffer.write_u8(PERMISSION_GRANTED)?;
}
}
SyncMessage::AwarenessQuery => {
write_sync_tag(buffer, MessageType::AwarenessQuery)?;
}
SyncMessage::Awareness(awareness) => {
write_sync_tag(buffer, MessageType::Awareness)?;
write_var_buffer(buffer, &{
let mut update = Vec::new();
write_awareness(&mut update, awareness)?;
update
})?;
}
SyncMessage::Doc(doc) => {
write_sync_tag(buffer, MessageType::Doc)?;
write_doc_message(buffer, doc)?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::{awareness::AwarenessState, *};
#[test]
fn test_sync_tag() {
let messages = [
MessageType::Auth,
MessageType::Awareness,
MessageType::AwarenessQuery,
MessageType::Doc,
];
for msg in messages {
let mut buffer = Vec::new();
write_sync_tag(&mut buffer, msg.clone()).unwrap();
let (tail, decoded) = read_sync_tag(&buffer).unwrap();
assert_eq!(tail.len(), 0);
assert_eq!(decoded, msg);
}
}
#[test]
fn test_sync_message() {
let messages = [
SyncMessage::Auth(Some("reason".to_string())),
SyncMessage::Awareness(HashMap::from([(1, AwarenessState::new(1, "test".into()))])),
SyncMessage::AwarenessQuery,
SyncMessage::Doc(DocMessage::Step1(vec![4, 5, 6])),
SyncMessage::Doc(DocMessage::Step2(vec![7, 8, 9])),
SyncMessage::Doc(DocMessage::Update(vec![10, 11, 12])),
];
for msg in messages {
let mut buffer = Vec::new();
write_sync_message(&mut buffer, &msg).unwrap();
let (tail, decoded) = read_sync_message(&buffer).unwrap();
assert_eq!(tail.len(), 0);
assert_eq!(decoded, msg);
}
}
}

View File

@@ -0,0 +1,32 @@
#[allow(unused)]
#[cfg(not(loom))]
pub(crate) use std::sync::{
atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicU8, Ordering},
Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard,
};
pub use std::sync::{Arc, Weak};
#[cfg(all(test, not(loom)))]
pub(crate) use std::{
sync::{atomic::AtomicUsize, MutexGuard},
thread,
};
#[cfg(loom)]
pub(crate) use loom::{
sync::{
atomic::{AtomicBool, AtomicU16, AtomicU8, AtomicUsize, Ordering},
Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard,
},
thread,
};
#[macro_export(local_inner_macros)]
macro_rules! loom_model {
($test:block) => {
#[cfg(loom)]
loom::model(move || $test);
#[cfg(not(loom))]
$test
};
}

View File

@@ -0,0 +1,2 @@
*.node
.coverage

View File

@@ -0,0 +1,20 @@
[package]
authors = ["DarkSky <darksky2048@gmail.com>"]
edition = "2021"
license = "MIT"
name = "y-octo-node"
repository = "https://github.com/toeverything/y-octo"
version = "0.0.1"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
crate-type = ["cdylib"]
[dependencies]
anyhow = { workspace = true }
napi = { workspace = true, features = ["anyhow", "napi4"] }
napi-derive = { workspace = true }
y-octo = { workspace = true, features = ["large_refs"] }
[build-dependencies]
napi-build = { workspace = true }

View File

@@ -0,0 +1,3 @@
fn main() {
napi_build::setup();
}

48
packages/common/y-octo/node/index.d.ts vendored Normal file
View File

@@ -0,0 +1,48 @@
/* auto-generated by NAPI-RS */
/* eslint-disable */
export declare class Doc {
constructor(clientId?: number | undefined | null)
get clientId(): number
get guid(): string
get keys(): Array<string>
getOrCreateArray(key: string): YArray
getOrCreateText(key: string): YText
getOrCreateMap(key: string): YMap
createArray(): YArray
createText(): YText
createMap(): YMap
applyUpdate(update: Uint8Array): void
encodeStateAsUpdateV1(state?: Uint8Array | undefined | null): Uint8Array
gc(): void
onUpdate(callback: (result: Uint8Array) => void): void
}
export declare class YArray {
constructor()
get length(): number
get isEmpty(): boolean
get<T = unknown>(index: number): T
insert(index: number, value: YArray | YMap | YText | boolean | number | string | Record<string, any> | null | undefined): void
remove(index: number, len: number): void
toJson(): JsArray
}
export declare class YMap {
constructor()
get length(): number
get isEmpty(): boolean
get<T = unknown>(key: string): T
set(key: string, value: YArray | YMap | YText | boolean | number | string | Record<string, any> | null | undefined): void
remove(key: string): void
toJson(): object
}
export declare class YText {
constructor()
get len(): number
get isEmpty(): boolean
insert(index: number, str: string): void
remove(index: number, len: number): void
get length(): number
toString(): string
}

View File

@@ -0,0 +1,377 @@
// prettier-ignore
/* eslint-disable */
// @ts-nocheck
/* auto-generated by NAPI-RS */
const { createRequire } = require('node:module')
require = createRequire(__filename);
const { readFileSync } = require('node:fs');
let nativeBinding = null;
const loadErrors = [];
const isMusl = () => {
let musl = false;
if (process.platform === 'linux') {
musl = isMuslFromFilesystem();
if (musl === null) {
musl = isMuslFromReport();
}
if (musl === null) {
musl = isMuslFromChildProcess();
}
}
return musl;
};
const isFileMusl = f => f.includes('libc.musl-') || f.includes('ld-musl-');
const isMuslFromFilesystem = () => {
try {
return readFileSync('/usr/bin/ldd', 'utf-8').includes('musl');
} catch {
return null;
}
};
const isMuslFromReport = () => {
let report = null;
if (typeof process.report?.getReport === 'function') {
process.report.excludeNetwork = true;
report = process.report.getReport();
}
if (!report) {
return null;
}
if (report.header && report.header.glibcVersionRuntime) {
return false;
}
if (Array.isArray(report.sharedObjects)) {
if (report.sharedObjects.some(isFileMusl)) {
return true;
}
}
return false;
};
const isMuslFromChildProcess = () => {
try {
return require('child_process')
.execSync('ldd --version', { encoding: 'utf8' })
.includes('musl');
} catch (e) {
// If we reach this case, we don't know if the system is musl or not, so is better to just fallback to false
return false;
}
};
function requireNative() {
if (process.env.NAPI_RS_NATIVE_LIBRARY_PATH) {
try {
nativeBinding = require(process.env.NAPI_RS_NATIVE_LIBRARY_PATH);
} catch (err) {
loadErrors.push(err);
}
} else if (process.platform === 'android') {
if (process.arch === 'arm64') {
try {
return require('./y-octo.android-arm64.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-android-arm64');
} catch (e) {
loadErrors.push(e);
}
} else if (process.arch === 'arm') {
try {
return require('./y-octo.android-arm-eabi.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-android-arm-eabi');
} catch (e) {
loadErrors.push(e);
}
} else {
loadErrors.push(
new Error(`Unsupported architecture on Android ${process.arch}`)
);
}
} else if (process.platform === 'win32') {
if (process.arch === 'x64') {
try {
return require('./y-octo.win32-x64-msvc.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-win32-x64-msvc');
} catch (e) {
loadErrors.push(e);
}
} else if (process.arch === 'ia32') {
try {
return require('./y-octo.win32-ia32-msvc.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-win32-ia32-msvc');
} catch (e) {
loadErrors.push(e);
}
} else if (process.arch === 'arm64') {
try {
return require('./y-octo.win32-arm64-msvc.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-win32-arm64-msvc');
} catch (e) {
loadErrors.push(e);
}
} else {
loadErrors.push(
new Error(`Unsupported architecture on Windows: ${process.arch}`)
);
}
} else if (process.platform === 'darwin') {
try {
return require('./y-octo.darwin-universal.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-darwin-universal');
} catch (e) {
loadErrors.push(e);
}
if (process.arch === 'x64') {
try {
return require('./y-octo.darwin-x64.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-darwin-x64');
} catch (e) {
loadErrors.push(e);
}
} else if (process.arch === 'arm64') {
try {
return require('./y-octo.darwin-arm64.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-darwin-arm64');
} catch (e) {
loadErrors.push(e);
}
} else {
loadErrors.push(
new Error(`Unsupported architecture on macOS: ${process.arch}`)
);
}
} else if (process.platform === 'freebsd') {
if (process.arch === 'x64') {
try {
return require('./y-octo.freebsd-x64.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-freebsd-x64');
} catch (e) {
loadErrors.push(e);
}
} else if (process.arch === 'arm64') {
try {
return require('./y-octo.freebsd-arm64.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-freebsd-arm64');
} catch (e) {
loadErrors.push(e);
}
} else {
loadErrors.push(
new Error(`Unsupported architecture on FreeBSD: ${process.arch}`)
);
}
} else if (process.platform === 'linux') {
if (process.arch === 'x64') {
if (isMusl()) {
try {
return require('./y-octo.linux-x64-musl.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-x64-musl');
} catch (e) {
loadErrors.push(e);
}
} else {
try {
return require('./y-octo.linux-x64-gnu.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-x64-gnu');
} catch (e) {
loadErrors.push(e);
}
}
} else if (process.arch === 'arm64') {
if (isMusl()) {
try {
return require('./y-octo.linux-arm64-musl.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-arm64-musl');
} catch (e) {
loadErrors.push(e);
}
} else {
try {
return require('./y-octo.linux-arm64-gnu.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-arm64-gnu');
} catch (e) {
loadErrors.push(e);
}
}
} else if (process.arch === 'arm') {
if (isMusl()) {
try {
return require('./y-octo.linux-arm-musleabihf.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-arm-musleabihf');
} catch (e) {
loadErrors.push(e);
}
} else {
try {
return require('./y-octo.linux-arm-gnueabihf.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-arm-gnueabihf');
} catch (e) {
loadErrors.push(e);
}
}
} else if (process.arch === 'riscv64') {
if (isMusl()) {
try {
return require('./y-octo.linux-riscv64-musl.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-riscv64-musl');
} catch (e) {
loadErrors.push(e);
}
} else {
try {
return require('./y-octo.linux-riscv64-gnu.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-riscv64-gnu');
} catch (e) {
loadErrors.push(e);
}
}
} else if (process.arch === 'ppc64') {
try {
return require('./y-octo.linux-ppc64-gnu.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-ppc64-gnu');
} catch (e) {
loadErrors.push(e);
}
} else if (process.arch === 's390x') {
try {
return require('./y-octo.linux-s390x-gnu.node');
} catch (e) {
loadErrors.push(e);
}
try {
return require('@y-octo/node-linux-s390x-gnu');
} catch (e) {
loadErrors.push(e);
}
} else {
loadErrors.push(
new Error(`Unsupported architecture on Linux: ${process.arch}`)
);
}
} else {
loadErrors.push(
new Error(
`Unsupported OS: ${process.platform}, architecture: ${process.arch}`
)
);
}
}
nativeBinding = requireNative();
if (!nativeBinding || process.env.NAPI_RS_FORCE_WASI) {
try {
nativeBinding = require('./y-octo.wasi.cjs');
} catch (err) {
if (process.env.NAPI_RS_FORCE_WASI) {
loadErrors.push(err);
}
}
if (!nativeBinding) {
try {
nativeBinding = require('@y-octo/node-wasm32-wasi');
} catch (err) {
if (process.env.NAPI_RS_FORCE_WASI) {
loadErrors.push(err);
}
}
}
}
if (!nativeBinding) {
if (loadErrors.length > 0) {
// TODO Link to documentation with potential fixes
// - The package owner could build/publish bindings for this arch
// - The user may need to bundle the correct files
// - The user may need to re-install node_modules to get new packages
throw new Error('Failed to load native binding', { cause: loadErrors });
}
throw new Error(`Failed to load native binding`);
}
module.exports.Doc = nativeBinding.Doc;
module.exports.YArray = nativeBinding.YArray;
module.exports.YMap = nativeBinding.YMap;
module.exports.YText = nativeBinding.YText;

View File

@@ -0,0 +1,72 @@
{
"name": "@y-octo/node",
"private": true,
"main": "index.js",
"types": "index.d.ts",
"napi": {
"binaryName": "y-octo",
"targets": [
"x86_64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"aarch64-apple-darwin",
"aarch64-pc-windows-msvc",
"aarch64-unknown-linux-gnu",
"x86_64-unknown-linux-musl",
"aarch64-unknown-linux-musl"
],
"ts": {
"constEnum": false
}
},
"license": "MIT",
"devDependencies": {
"@napi-rs/cli": "3.0.0-alpha.77",
"@types/node": "^22.14.1",
"@types/prompts": "^2.4.9",
"c8": "^10.1.3",
"prompts": "^2.4.2",
"ts-node": "^10.9.2",
"typescript": "^5.8.3",
"yjs": "^13.6.24"
},
"engines": {
"node": ">= 10"
},
"scripts": {
"artifacts": "napi artifacts",
"build": "napi build --platform --release --no-const-enum",
"build:debug": "napi build --platform --no-const-enum",
"universal": "napi universal",
"test": "NODE_NO_WARNINGS=1 node ./scripts/run-test.mts all",
"test:watch": "yarn exec ts-node-esm ./scripts/run-test.ts all --watch",
"test:coverage": "NODE_OPTIONS=\"--loader ts-node/esm\" c8 node ./scripts/run-test.mts all",
"version": "napi version"
},
"version": "0.0.1",
"sharedConfig": {
"nodeArgs": [
"--loader",
"ts-node/esm",
"--es-module-specifier-resolution=node"
],
"env": {
"TS_NODE_TRANSPILE_ONLY": "1",
"TS_NODE_PROJECT": "./tsconfig.json",
"NODE_ENV": "development",
"DEBUG": "napi:*"
}
},
"c8": {
"reporter": [
"text",
"lcov"
],
"report-dir": ".coverage",
"exclude": [
"scripts",
"node_modules",
"**/*.spec.ts"
]
}
}

View File

@@ -0,0 +1,78 @@
#!/usr/bin/env ts-node-esm
import { resolve } from 'node:path';
import { spawn } from 'node:child_process';
import { readdir } from 'node:fs/promises';
import * as process from 'node:process';
import { fileURLToPath } from 'node:url';
import prompts from 'prompts';
import pkg from '../package.json' with { type: 'json' };
const root = fileURLToPath(new URL('..', import.meta.url));
const testDir = resolve(root, 'tests');
const files = await readdir(testDir);
const watchMode = process.argv.includes('--watch');
const sharedArgs = [
...pkg.sharedConfig.nodeArgs,
'--test',
watchMode ? '--watch' : '',
];
const env = {
...pkg.sharedConfig.env,
PATH: process.env.PATH,
NODE_ENV: 'test',
NODE_NO_WARNINGS: '1',
};
if (process.argv[2] === 'all') {
const cp = spawn(
'node',
[...sharedArgs, ...files.map(f => resolve(testDir, f))],
{
cwd: root,
env,
stdio: 'inherit',
shell: true,
}
);
cp.on('exit', code => {
process.exit(code ?? 0);
});
} else {
const result = await prompts([
{
type: 'select',
name: 'file',
message: 'Select a file to run',
choices: files.map(file => ({
title: file,
value: file,
})),
initial: 1,
},
]);
const target = resolve(testDir, result.file);
const cp = spawn(
'node',
[
...sharedArgs,
'--test-reporter=spec',
'--test-reporter-destination=stdout',
target,
],
{
cwd: root,
env,
stdio: 'inherit',
shell: true,
}
);
cp.on('exit', code => {
process.exit(code ?? 0);
});
}

View File

@@ -0,0 +1,160 @@
use napi::{bindgen_prelude::Array as JsArray, Env, JsUnknown, ValueType};
use y_octo::{Any, Array, Value};
use super::*;
#[napi]
pub struct YArray {
pub(crate) array: Array,
}
#[napi]
impl YArray {
#[allow(clippy::new_without_default)]
#[napi(constructor)]
pub fn new() -> Self {
unimplemented!()
}
pub(crate) fn inner_new(array: Array) -> Self {
Self { array }
}
#[napi(getter)]
pub fn length(&self) -> i64 {
self.array.len() as i64
}
#[napi(getter)]
pub fn is_empty(&self) -> bool {
self.array.is_empty()
}
#[napi(ts_generic_types = "T = unknown", ts_return_type = "T")]
pub fn get(&self, env: Env, index: i64) -> Result<MixedYType> {
if let Some(value) = self.array.get(index as u64) {
match value {
Value::Any(any) => get_js_unknown_from_any(env, any).map(MixedYType::D),
Value::Array(array) => Ok(MixedYType::A(YArray::inner_new(array))),
Value::Map(map) => Ok(MixedYType::B(YMap::inner_new(map))),
Value::Text(text) => Ok(MixedYType::C(YText::inner_new(text))),
_ => env.get_null().map(|v| v.into_unknown()).map(MixedYType::D),
}
.map_err(anyhow::Error::from)
} else {
Ok(MixedYType::D(env.get_null()?.into_unknown()))
}
}
#[napi(
ts_args_type = "index: number, value: YArray | YMap | YText | boolean | number | string | \
Record<string, any> | null | undefined"
)]
pub fn insert(&mut self, index: i64, value: MixedRefYType) -> Result<()> {
match value {
MixedRefYType::A(array) => self
.array
.insert(index as u64, array.array.clone())
.map_err(anyhow::Error::from),
MixedRefYType::B(map) => self
.array
.insert(index as u64, map.map.clone())
.map_err(anyhow::Error::from),
MixedRefYType::C(text) => self
.array
.insert(index as u64, text.text.clone())
.map_err(anyhow::Error::from),
MixedRefYType::D(unknown) => match unknown.get_type() {
Ok(value_type) => match value_type {
ValueType::Undefined | ValueType::Null => self
.array
.insert(index as u64, Any::Null)
.map_err(anyhow::Error::from),
ValueType::Boolean => match unknown.coerce_to_bool().and_then(|v| v.get_value()) {
Ok(boolean) => self
.array
.insert(index as u64, boolean)
.map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::new(e).context("Failed to coerce value to boolean")),
},
ValueType::Number => match unknown.coerce_to_number().and_then(|v| v.get_double()) {
Ok(number) => self
.array
.insert(index as u64, number)
.map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::new(e).context("Failed to coerce value to number")),
},
ValueType::String => {
match unknown
.coerce_to_string()
.and_then(|v| v.into_utf8())
.and_then(|s| s.as_str().map(|s| s.to_string()))
{
Ok(string) => self
.array
.insert(index as u64, string)
.map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::new(e).context("Failed to coerce value to string")),
}
}
ValueType::Object => match unknown
.coerce_to_object()
.and_then(|o| o.get_array_length().map(|l| (o, l)))
{
Ok((object, length)) => {
for i in 0..length {
if let Ok(any) = object
.get_element::<JsUnknown>(i)
.and_then(get_any_from_js_unknown)
{
self
.array
.insert(index as u64 + i as u64, Value::Any(any))
.map_err(anyhow::Error::from)?;
}
}
Ok(())
}
Err(e) => Err(anyhow::Error::new(e).context("Failed to coerce value to object")),
},
ValueType::BigInt => Err(anyhow::Error::msg("BigInt values are not supported")),
ValueType::Symbol => Err(anyhow::Error::msg("Symbol values are not supported")),
ValueType::Function => Err(anyhow::Error::msg("Function values are not supported")),
ValueType::External => Err(anyhow::Error::msg("External values are not supported")),
ValueType::Unknown => Err(anyhow::Error::msg("Unknown values are not supported")),
},
Err(e) => Err(anyhow::Error::from(e)),
},
}
}
#[napi]
pub fn remove(&mut self, index: i64, len: i64) -> Result<()> {
self
.array
.remove(index as u64, len as u64)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn to_json(&self, env: Env) -> Result<JsArray> {
let mut js_array = env.create_array(0)?;
for value in self.array.iter() {
js_array.insert(get_js_unknown_from_value(env, value)?)?;
}
Ok(js_array)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_array_init() {
let doc = Doc::new(None);
let array = doc.get_or_create_array("array".into()).unwrap();
assert_eq!(array.length(), 0);
}
}

View File

@@ -0,0 +1,176 @@
use napi::{
bindgen_prelude::{Buffer, Uint8Array},
threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode},
};
use y_octo::{CrdtRead, Doc as YDoc, History, RawDecoder, StateVector};
use super::*;
#[napi]
pub struct Doc {
doc: YDoc,
}
#[napi]
impl Doc {
#[napi(constructor)]
pub fn new(client_id: Option<i64>) -> Self {
Self {
doc: if let Some(client_id) = client_id {
YDoc::with_client(client_id as u64)
} else {
YDoc::default()
},
}
}
#[napi(getter)]
pub fn client_id(&self) -> i64 {
self.doc.client() as i64
}
#[napi(getter)]
pub fn guid(&self) -> &str {
self.doc.guid()
}
#[napi(getter)]
pub fn keys(&self) -> Vec<String> {
self.doc.keys()
}
#[napi]
pub fn get_or_create_array(&self, key: String) -> Result<YArray> {
self
.doc
.get_or_create_array(key)
.map(YArray::inner_new)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn get_or_create_text(&self, key: String) -> Result<YText> {
self
.doc
.get_or_create_text(key)
.map(YText::inner_new)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn get_or_create_map(&self, key: String) -> Result<YMap> {
self
.doc
.get_or_create_map(key)
.map(YMap::inner_new)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn create_array(&self) -> Result<YArray> {
self
.doc
.create_array()
.map(YArray::inner_new)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn create_text(&self) -> Result<YText> {
self
.doc
.create_text()
.map(YText::inner_new)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn create_map(&self) -> Result<YMap> {
self
.doc
.create_map()
.map(YMap::inner_new)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn apply_update(&mut self, update: &[u8]) -> Result<()> {
self.doc.apply_update_from_binary_v1(update)?;
Ok(())
}
#[napi]
pub fn encode_state_as_update_v1(&self, state: Option<&[u8]>) -> Result<Uint8Array> {
let result = match state {
Some(state) => {
let mut decoder = RawDecoder::new(state);
let state = StateVector::read(&mut decoder)?;
self.doc.encode_state_as_update_v1(&state)
}
None => self.doc.encode_update_v1(),
};
result.map(|v| v.into()).map_err(anyhow::Error::from)
}
#[napi]
pub fn gc(&self) -> Result<()> {
self.doc.gc().map_err(anyhow::Error::from)
}
#[napi(ts_args_type = "callback: (result: Uint8Array) => void")]
pub fn on_update(&mut self, callback: ThreadsafeFunction<Buffer>) -> Result<()> {
let callback = move |update: &[u8], _h: &[History]| {
callback.call(
Ok(update.to_vec().into()),
ThreadsafeFunctionCallMode::Blocking,
);
};
self.doc.subscribe(Box::new(callback));
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_doc_client() {
let client_id = 1;
let doc = Doc::new(Some(client_id));
assert_eq!(doc.client_id(), 1);
}
#[test]
fn test_doc_guid() {
let doc = Doc::new(None);
assert_eq!(doc.guid().len(), 21);
}
#[test]
fn test_create_array() {
let doc = Doc::new(None);
let array = doc.get_or_create_array("array".into()).unwrap();
assert_eq!(array.length(), 0);
}
#[test]
fn test_create_text() {
let doc = Doc::new(None);
let text = doc.get_or_create_text("text".into()).unwrap();
assert_eq!(text.len(), 0);
}
#[test]
fn test_keys() {
let doc = Doc::new(None);
doc.get_or_create_array("array".into()).unwrap();
doc.get_or_create_text("text".into()).unwrap();
doc.get_or_create_map("map".into()).unwrap();
let mut keys = doc.keys();
keys.sort();
assert_eq!(keys, vec!["array", "map", "text"]);
}
}

View File

@@ -0,0 +1,17 @@
use anyhow::Result;
use napi_derive::napi;
mod array;
mod doc;
mod map;
mod text;
mod utils;
pub use array::YArray;
pub use doc::Doc;
pub use map::YMap;
pub use text::YText;
use utils::{
get_any_from_js_object, get_any_from_js_unknown, get_js_unknown_from_any,
get_js_unknown_from_value, MixedRefYType, MixedYType,
};

View File

@@ -0,0 +1,134 @@
use napi::{Env, JsObject, ValueType};
use y_octo::{Any, Map, Value};
use super::*;
#[napi]
pub struct YMap {
pub(crate) map: Map,
}
#[napi]
impl YMap {
#[allow(clippy::new_without_default)]
#[napi(constructor)]
pub fn new() -> Self {
unimplemented!()
}
pub(crate) fn inner_new(map: Map) -> Self {
Self { map }
}
#[napi(getter)]
pub fn length(&self) -> i64 {
self.map.len() as i64
}
#[napi(getter)]
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
#[napi(ts_generic_types = "T = unknown", ts_return_type = "T")]
pub fn get(&self, env: Env, key: String) -> Result<MixedYType> {
if let Some(value) = self.map.get(&key) {
match value {
Value::Any(any) => get_js_unknown_from_any(env, any).map(MixedYType::D),
Value::Array(array) => Ok(MixedYType::A(YArray::inner_new(array))),
Value::Map(map) => Ok(MixedYType::B(YMap::inner_new(map))),
Value::Text(text) => Ok(MixedYType::C(YText::inner_new(text))),
_ => env.get_null().map(|v| v.into_unknown()).map(MixedYType::D),
}
.map_err(anyhow::Error::from)
} else {
Ok(MixedYType::D(env.get_null()?.into_unknown()))
}
}
#[napi(
ts_args_type = "key: string, value: YArray | YMap | YText | boolean | number | string | \
Record<string, any> | null | undefined"
)]
pub fn set(&mut self, key: String, value: MixedRefYType) -> Result<()> {
match value {
MixedRefYType::A(array) => self
.map
.insert(key, array.array.clone())
.map_err(anyhow::Error::from),
MixedRefYType::B(map) => self
.map
.insert(key, map.map.clone())
.map_err(anyhow::Error::from),
MixedRefYType::C(text) => self
.map
.insert(key, text.text.clone())
.map_err(anyhow::Error::from),
MixedRefYType::D(unknown) => match unknown.get_type() {
Ok(value_type) => match value_type {
ValueType::Undefined | ValueType::Null => {
self.map.insert(key, Any::Null).map_err(anyhow::Error::from)
}
ValueType::Boolean => match unknown.coerce_to_bool().and_then(|v| v.get_value()) {
Ok(boolean) => self.map.insert(key, boolean).map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::from(e).context("Failed to coerce value to boolean")),
},
ValueType::Number => match unknown.coerce_to_number().and_then(|v| v.get_double()) {
Ok(number) => self.map.insert(key, number).map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::from(e).context("Failed to coerce value to number")),
},
ValueType::String => {
match unknown
.coerce_to_string()
.and_then(|v| v.into_utf8())
.and_then(|s| s.as_str().map(|s| s.to_string()))
{
Ok(string) => self.map.insert(key, string).map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::from(e).context("Failed to coerce value to string")),
}
}
ValueType::Object => match unknown.coerce_to_object().and_then(get_any_from_js_object) {
Ok(any) => self
.map
.insert(key, Value::Any(any))
.map_err(anyhow::Error::from),
Err(e) => Err(anyhow::Error::from(e).context("Failed to coerce value to object")),
},
ValueType::BigInt => Err(anyhow::Error::msg("BigInt values are not supported")),
ValueType::Symbol => Err(anyhow::Error::msg("Symbol values are not supported")),
ValueType::Function => Err(anyhow::Error::msg("Function values are not supported")),
ValueType::External => Err(anyhow::Error::msg("External values are not supported")),
ValueType::Unknown => Err(anyhow::Error::msg("Unknown values are not supported")),
},
Err(e) => Err(anyhow::Error::from(e)),
},
}
}
#[napi]
pub fn remove(&mut self, key: String) {
self.map.remove(&key);
}
#[napi]
pub fn to_json(&self, env: Env) -> Result<JsObject> {
let mut js_object = env.create_object()?;
for (key, value) in self.map.iter() {
js_object.set(key, get_js_unknown_from_value(env, value))?;
}
Ok(js_object)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_map_init() {
let doc = Doc::new(None);
let text = doc.get_or_create_map("map".into()).unwrap();
assert_eq!(text.length(), 0);
}
}

View File

@@ -0,0 +1,82 @@
use y_octo::Text;
use super::*;
#[napi]
pub struct YText {
pub(crate) text: Text,
}
#[napi]
impl YText {
#[allow(clippy::new_without_default)]
#[napi(constructor)]
pub fn new() -> Self {
unimplemented!()
}
pub(crate) fn inner_new(text: Text) -> Self {
Self { text }
}
#[napi(getter)]
pub fn len(&self) -> i64 {
self.text.len() as i64
}
#[napi(getter)]
pub fn is_empty(&self) -> bool {
self.text.is_empty()
}
#[napi]
pub fn insert(&mut self, index: i64, str: String) -> Result<()> {
self
.text
.insert(index as u64, str)
.map_err(anyhow::Error::from)
}
#[napi]
pub fn remove(&mut self, index: i64, len: i64) -> Result<()> {
self
.text
.remove(index as u64, len as u64)
.map_err(anyhow::Error::from)
}
#[napi(getter)]
pub fn length(&self) -> i64 {
self.text.len() as i64
}
#[allow(clippy::inherent_to_string)]
#[napi]
pub fn to_string(&self) -> String {
self.text.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_init() {
let doc = Doc::new(None);
let text = doc.get_or_create_text("text".into()).unwrap();
assert_eq!(text.len(), 0);
}
#[test]
fn test_text_edit() {
let doc = Doc::new(None);
let mut text = doc.get_or_create_text("text".into()).unwrap();
text.insert(0, "hello".into()).unwrap();
assert_eq!(text.to_string(), "hello");
text.insert(5, " world".into()).unwrap();
assert_eq!(text.to_string(), "hello world");
text.remove(5, 6).unwrap();
assert_eq!(text.to_string(), "hello");
}
}

View File

@@ -0,0 +1,117 @@
use napi::{bindgen_prelude::Either4, Env, Error, JsObject, JsUnknown, Result, Status, ValueType};
use y_octo::{AHashMap, Any, HashMapExt, Value};
use super::*;
pub type MixedYType = Either4<YArray, YMap, YText, JsUnknown>;
pub type MixedRefYType<'a> = Either4<&'a YArray, &'a YMap, &'a YText, JsUnknown>;
pub fn get_js_unknown_from_any(env: Env, any: Any) -> Result<JsUnknown> {
match any {
Any::Null | Any::Undefined => env.get_null().map(|v| v.into_unknown()),
Any::True => env.get_boolean(true).map(|v| v.into_unknown()),
Any::False => env.get_boolean(false).map(|v| v.into_unknown()),
Any::Integer(number) => env.create_int32(number).map(|v| v.into_unknown()),
Any::BigInt64(number) => env.create_int64(number).map(|v| v.into_unknown()),
Any::Float32(number) => env.create_double(number.0 as f64).map(|v| v.into_unknown()),
Any::Float64(number) => env.create_double(number.0).map(|v| v.into_unknown()),
Any::String(string) => env.create_string(string.as_str()).map(|v| v.into_unknown()),
Any::Array(array) => {
let mut js_array = env.create_array_with_length(array.len())?;
for (i, value) in array.into_iter().enumerate() {
js_array.set_element(i as u32, get_js_unknown_from_any(env, value)?)?;
}
Ok(js_array.into_unknown())
}
_ => env.get_null().map(|v| v.into_unknown()),
}
}
#[allow(deprecated)]
// Wait for NAPI-RS External::into_unknown to be stabilized
pub fn get_js_unknown_from_value(env: Env, value: Value) -> Result<JsUnknown> {
match value {
Value::Any(any) => get_js_unknown_from_any(env, any),
Value::Array(array) => env
.create_external(YArray::inner_new(array), None)
.map(|o| o.into_unknown()),
Value::Map(map) => env
.create_external(YMap::inner_new(map), None)
.map(|o| o.into_unknown()),
Value::Text(text) => env
.create_external(YText::inner_new(text), None)
.map(|o| o.into_unknown()),
_ => env.get_null().map(|v| v.into_unknown()),
}
}
pub fn get_any_from_js_object(object: JsObject) -> Result<Any> {
if let Ok(length) = object.get_array_length() {
let mut array = Vec::with_capacity(length as usize);
for i in 0..length {
if let Ok(value) = object.get_element::<JsUnknown>(i) {
array.push(get_any_from_js_unknown(value)?);
}
}
Ok(Any::Array(array))
} else {
let mut map = AHashMap::new();
let keys = object.get_property_names()?;
if let Ok(length) = keys.get_array_length() {
for i in 0..length {
if let Ok((obj, key)) = keys.get_element::<JsUnknown>(i).and_then(|o| {
o.coerce_to_string().and_then(|obj| {
obj
.into_utf8()
.and_then(|s| s.as_str().map(|s| (obj, s.to_string())))
})
}) {
if let Ok(value) = object.get_property::<_, JsUnknown>(obj) {
println!("key: {}", key);
map.insert(key, get_any_from_js_unknown(value)?);
}
}
}
}
Ok(Any::Object(map))
}
}
pub fn get_any_from_js_unknown(js_unknown: JsUnknown) -> Result<Any> {
match js_unknown.get_type()? {
ValueType::Undefined | ValueType::Null => Ok(Any::Null),
ValueType::Boolean => Ok(
js_unknown
.coerce_to_bool()
.and_then(|v| v.get_value())?
.into(),
),
ValueType::Number => Ok(
js_unknown
.coerce_to_number()
.and_then(|v| v.get_double())
.map(|v| v.into())?,
),
ValueType::String => Ok(
js_unknown
.coerce_to_string()
.and_then(|v| v.into_utf8())
.and_then(|s| s.as_str().map(|s| s.to_string()))?
.into(),
),
ValueType::Object => {
if let Ok(object) = js_unknown.coerce_to_object() {
get_any_from_js_object(object)
} else {
Err(Error::new(
Status::InvalidArg,
"Failed to coerce value to object",
))
}
}
_ => Err(Error::new(
Status::InvalidArg,
"Failed to coerce value to any",
)),
}
}

View File

@@ -0,0 +1,62 @@
import assert, { equal, deepEqual } from 'node:assert';
import { test } from 'node:test';
import { Doc, type YArray } from '../index';
test('array test', { concurrency: false }, async t => {
let client_id: number;
let doc: Doc;
t.beforeEach(async () => {
client_id = (Math.random() * 100000) | 0;
doc = new Doc(client_id);
});
t.afterEach(async () => {
client_id = -1;
// @ts-expect-error - doc must not null in next range
doc = null;
});
await t.test('array should be created', () => {
let arr = doc.getOrCreateArray('arr');
deepEqual(doc.keys, ['arr']);
equal(arr.length, 0);
});
await t.test('array editing', () => {
let arr = doc.getOrCreateArray('arr');
arr.insert(0, true);
arr.insert(1, false);
arr.insert(2, 1);
arr.insert(3, 'hello world');
equal(arr.length, 4);
equal(arr.get(0), true);
equal(arr.get(1), false);
equal(arr.get(2), 1);
equal(arr.get(3), 'hello world');
equal(arr.length, 4);
arr.remove(1, 1);
equal(arr.length, 3);
equal(arr.get(2), 'hello world');
});
await t.test('sub array should can edit', () => {
let map = doc.getOrCreateMap('map');
let sub = doc.createArray();
map.set('sub', sub);
sub.insert(0, true);
sub.insert(1, false);
sub.insert(2, 1);
sub.insert(3, 'hello world');
equal(sub.length, 4);
let sub2 = map.get<YArray>('sub');
assert(sub2);
equal(sub2.get(0), true);
equal(sub2.get(1), false);
equal(sub2.get(2), 1);
equal(sub2.get(3), 'hello world');
equal(sub2.length, 4);
});
});

View File

@@ -0,0 +1,99 @@
import { equal } from 'node:assert';
import { test } from 'node:test';
import { Doc } from '../index';
import * as Y from 'yjs';
test('doc test', { concurrency: false }, async t => {
let client_id: number;
let doc: Doc;
t.beforeEach(async () => {
client_id = (Math.random() * 100000) | 0;
doc = new Doc(client_id);
});
t.afterEach(async () => {
client_id = -1;
// @ts-expect-error - doc must not null in next range
doc = null;
});
await t.test('doc id should be set', () => {
equal(doc.clientId, client_id);
});
await t.test('y-octo doc update should be apply', () => {
let array = doc.getOrCreateArray('array');
let map = doc.getOrCreateMap('map');
let text = doc.getOrCreateText('text');
array.insert(0, true);
array.insert(1, false);
array.insert(2, 1);
array.insert(3, 'hello world');
map.set('a', true);
map.set('b', false);
map.set('c', 1);
map.set('d', 'hello world');
text.insert(0, 'a');
text.insert(1, 'b');
text.insert(2, 'c');
let doc2 = new Doc(client_id);
doc2.applyUpdate(doc.encodeStateAsUpdateV1());
let array2 = doc2.getOrCreateArray('array');
let map2 = doc2.getOrCreateMap('map');
let text2 = doc2.getOrCreateText('text');
equal(doc2.clientId, client_id);
equal(array2.length, 4);
equal(array2.get(0), true);
equal(array2.get(1), false);
equal(array2.get(2), 1);
equal(array2.get(3), 'hello world');
equal(map2.length, 4);
equal(map2.get('a'), true);
equal(map2.get('b'), false);
equal(map2.get('c'), 1);
equal(map2.get('d'), 'hello world');
equal(text2.toString(), 'abc');
});
await t.test('yjs doc update should be apply', () => {
let doc2 = new Y.Doc();
let array2 = doc2.getArray('array');
let map2 = doc2.getMap('map');
let text2 = doc2.getText('text');
array2.insert(0, [true]);
array2.insert(1, [false]);
array2.insert(2, [1]);
array2.insert(3, ['hello world']);
map2.set('a', true);
map2.set('b', false);
map2.set('c', 1);
map2.set('d', 'hello world');
text2.insert(0, 'a');
text2.insert(1, 'b');
text2.insert(2, 'c');
doc.applyUpdate(Buffer.from(Y.encodeStateAsUpdate(doc2)));
let array = doc.getOrCreateArray('array');
let map = doc.getOrCreateMap('map');
let text = doc.getOrCreateText('text');
equal(array.length, 4);
equal(array.get(0), true);
equal(array.get(1), false);
equal(array.get(2), 1);
equal(array.get(3), 'hello world');
equal(map.length, 4);
equal(map.get('a'), true);
equal(map.get('b'), false);
equal(map.get('c'), 1);
equal(map.get('d'), 'hello world');
equal(text.toString(), 'abc');
});
});

View File

@@ -0,0 +1,152 @@
import assert, { equal, deepEqual } from 'node:assert';
import { test } from 'node:test';
import * as Y from 'yjs';
import { Doc, type YArray, type YMap, type YText } from '../index';
test('map test', { concurrency: false }, async t => {
let client_id: number;
let doc: Doc;
t.beforeEach(async () => {
client_id = (Math.random() * 100000) | 0;
doc = new Doc(client_id);
});
t.afterEach(async () => {
client_id = -1;
// @ts-expect-error - doc must not null in next range
doc = null;
});
await t.test('map should be created', () => {
let map = doc.getOrCreateMap('map');
deepEqual(doc.keys, ['map']);
equal(map.length, 0);
});
await t.test('map editing', () => {
let map = doc.getOrCreateMap('map');
map.set('a', true);
map.set('b', false);
map.set('c', 1);
map.set('d', 'hello world');
equal(map.length, 4);
equal(map.get('a'), true);
equal(map.get('b'), false);
equal(map.get('c'), 1);
equal(map.get('d'), 'hello world');
equal(map.length, 4);
map.remove('b');
equal(map.length, 3);
equal(map.get('d'), 'hello world');
});
await t.test('map should can be nested', () => {
let map = doc.getOrCreateMap('map');
let sub = doc.createMap();
map.set('sub', sub);
sub.set('a', true);
sub.set('b', false);
sub.set('c', 1);
sub.set('d', 'hello world');
equal(sub.length, 4);
let sub2 = map.get<YMap>('sub');
assert(sub2);
equal(sub2.get('a'), true);
equal(sub2.get('b'), false);
equal(sub2.get('c'), 1);
equal(sub2.get('d'), 'hello world');
equal(sub2.length, 4);
});
await t.test('y-octo to yjs compatibility test with nested type', () => {
let map = doc.getOrCreateMap('map');
let sub_array = doc.createArray();
let sub_map = doc.createMap();
let sub_text = doc.createText();
map.set('array', sub_array);
map.set('map', sub_map);
map.set('text', sub_text);
sub_array.insert(0, true);
sub_array.insert(1, false);
sub_array.insert(2, 1);
sub_array.insert(3, 'hello world');
sub_map.set('a', true);
sub_map.set('b', false);
sub_map.set('c', 1);
sub_map.set('d', 'hello world');
sub_text.insert(0, 'a');
sub_text.insert(1, 'b');
sub_text.insert(2, 'c');
let doc2 = new Y.Doc();
Y.applyUpdate(doc2, doc.encodeStateAsUpdateV1());
let map2 = doc2.getMap<any>('map');
let sub_array2 = map2.get('array') as Y.Array<any>;
let sub_map2 = map2.get('map') as Y.Map<any>;
let sub_text2 = map2.get('text') as Y.Text;
assert(sub_array2);
equal(sub_array2.length, 4);
equal(sub_array2.get(0), true);
equal(sub_array2.get(1), false);
equal(sub_array2.get(2), 1);
equal(sub_array2.get(3), 'hello world');
assert(sub_map2);
equal(sub_map2.get('a'), true);
equal(sub_map2.get('b'), false);
equal(sub_map2.get('c'), 1);
equal(sub_map2.get('d'), 'hello world');
assert(sub_text2);
equal(sub_text2.toString(), 'abc');
});
await t.test('yjs to y-octo compatibility test with nested type', () => {
let doc2 = new Y.Doc();
let map2 = doc2.getMap<any>('map');
let sub_array2 = new Y.Array<any>();
let sub_map2 = new Y.Map<any>();
let sub_text2 = new Y.Text();
map2.set('array', sub_array2);
map2.set('map', sub_map2);
map2.set('text', sub_text2);
sub_array2.insert(0, [true]);
sub_array2.insert(1, [false]);
sub_array2.insert(2, [1]);
sub_array2.insert(3, ['hello world']);
sub_map2.set('a', true);
sub_map2.set('b', false);
sub_map2.set('c', 1);
sub_map2.set('d', 'hello world');
sub_text2.insert(0, 'a');
sub_text2.insert(1, 'b');
sub_text2.insert(2, 'c');
doc.applyUpdate(Buffer.from(Y.encodeStateAsUpdate(doc2)));
let map = doc.getOrCreateMap('map');
let sub_array = map.get<YArray>('array');
let sub_map = map.get<YMap>('map');
let sub_text = map.get<YText>('text');
assert(sub_array);
equal(sub_array.length, 4);
equal(sub_array.get(0), true);
equal(sub_array.get(1), false);
equal(sub_array.get(2), 1);
equal(sub_array.get(3), 'hello world');
assert(sub_map);
equal(sub_map.get('a'), true);
equal(sub_map.get('b'), false);
equal(sub_map.get('c'), 1);
equal(sub_map.get('d'), 'hello world');
assert(sub_text);
equal(sub_text.toString(), 'abc');
});
});

View File

@@ -0,0 +1,54 @@
import assert, { equal, deepEqual } from 'node:assert';
import { test } from 'node:test';
import { Doc, type YText } from '../index';
test('text test', { concurrency: false }, async t => {
let client_id: number;
let doc: Doc;
t.beforeEach(async () => {
client_id = (Math.random() * 100000) | 0;
doc = new Doc(client_id);
});
t.afterEach(async () => {
client_id = -1;
// @ts-expect-error - doc must not null in next range
doc = null;
});
await t.test('text should be created', () => {
let text = doc.getOrCreateText('text');
deepEqual(doc.keys, ['text']);
equal(text.len, 0);
});
await t.test('text editing', () => {
let text = doc.getOrCreateText('text');
text.insert(0, 'a');
text.insert(1, 'b');
text.insert(2, 'c');
equal(text.toString(), 'abc');
text.remove(0, 1);
equal(text.toString(), 'bc');
text.remove(1, 1);
equal(text.toString(), 'b');
text.remove(0, 1);
equal(text.toString(), '');
});
await t.test('sub text should can edit', () => {
let map = doc.getOrCreateMap('map');
let sub = doc.createText();
map.set('sub', sub);
sub.insert(0, 'a');
sub.insert(1, 'b');
sub.insert(2, 'c');
equal(sub.toString(), 'abc');
let sub2 = map.get<YText>('sub');
assert(sub2);
equal(sub2.toString(), 'abc');
});
});

View File

@@ -0,0 +1,10 @@
{
"extends": "../../../../tsconfig.node.json",
"compilerOptions": {
"noEmit": false,
"outDir": "lib",
"composite": true
},
"include": ["index.d.ts", "tests/**/*.mts"],
"references": []
}

View File

@@ -0,0 +1,71 @@
[package]
authors = ["x1a0t <405028157@qq.com>", "DarkSky <darksky2048@gmail.com>"]
edition = "2021"
license = "MIT"
name = "y-octo-utils"
version = "0.0.1"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
bench = ["regex"]
default = ["merger"]
fuzz = ["arbitrary", "phf"]
merger = ["clap", "y-octo/large_refs"]
[dependencies]
arbitrary = { workspace = true, features = ["derive"], optional = true }
clap = { workspace = true, features = ["derive"], optional = true }
lib0 = { workspace = true, features = ["lib0-serde"] }
phf = { workspace = true, features = ["macros"], optional = true }
rand = { workspace = true }
rand_chacha = { workspace = true }
regex = { workspace = true, optional = true }
y-octo = { workspace = true }
y-sync = { workspace = true }
yrs = { workspace = true }
[dev-dependencies]
criterion = { workspace = true }
path-ext = { workspace = true }
proptest = { workspace = true }
proptest-derive = { workspace = true }
[[bin]]
name = "bench_result_render"
path = "bin/bench_result_render.rs"
[[bin]]
name = "doc_merger"
path = "bin/doc_merger.rs"
[[bin]]
name = "memory_leak_test"
path = "bin/memory_leak_test.rs"
[[bench]]
harness = false
name = "array_ops_benchmarks"
[[bench]]
harness = false
name = "codec_benchmarks"
[[bench]]
harness = false
name = "map_ops_benchmarks"
[[bench]]
harness = false
name = "text_ops_benchmarks"
[[bench]]
harness = false
name = "apply_benchmarks"
[[bench]]
harness = false
name = "update_benchmarks"
[lib]
bench = true

View File

@@ -0,0 +1,35 @@
mod utils;
use std::time::Duration;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;
fn apply(c: &mut Criterion) {
let files = Files::load();
let mut group = c.benchmark_group("apply");
group.measurement_time(Duration::from_secs(15));
for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("apply with yrs", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use yrs::{updates::decoder::Decode, Doc, Transact, Update};
let update = Update::decode_v1(content).unwrap();
let doc = Doc::new();
doc.transact_mut().apply_update(update).unwrap();
});
},
);
}
group.finish();
}
criterion_group!(benches, apply);
criterion_main!(benches);

View File

@@ -0,0 +1,79 @@
use std::time::Duration;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{Rng, SeedableRng};
fn operations(c: &mut Criterion) {
let mut group = c.benchmark_group("ops/array");
group.measurement_time(Duration::from_secs(15));
group.bench_function("yrs/insert", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(1234);
let idxs = (0..99)
.map(|_| rng.random_range(0..base_text.len() as u32))
.collect::<Vec<_>>();
b.iter(|| {
use yrs::{Array, Doc, Transact};
let doc = Doc::new();
let array = doc.get_or_insert_array("test");
let mut trx = doc.transact_mut();
for c in base_text.chars() {
array.push_back(&mut trx, c.to_string());
}
for idx in &idxs {
array.insert(&mut trx, *idx, "test");
}
drop(trx);
});
});
group.bench_function("yrs/insert range", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(1234);
let idxs = (0..99)
.map(|_| rng.random_range(0..base_text.len() as u32))
.collect::<Vec<_>>();
b.iter(|| {
use yrs::{Array, Doc, Transact};
let doc = Doc::new();
let array = doc.get_or_insert_array("test");
let mut trx = doc.transact_mut();
for c in base_text.chars() {
array.push_back(&mut trx, c.to_string());
}
for idx in &idxs {
array.insert_range(&mut trx, *idx, vec!["test1", "test2"]);
}
drop(trx);
});
});
group.bench_function("yrs/remove", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
b.iter(|| {
use yrs::{Array, Doc, Transact};
let doc = Doc::new();
let array = doc.get_or_insert_array("test");
let mut trx = doc.transact_mut();
for c in base_text.chars() {
array.push_back(&mut trx, c.to_string());
}
for idx in (base_text.len() as u32)..0 {
array.remove(&mut trx, idx);
}
drop(trx);
});
});
group.finish();
}
criterion_group!(benches, operations);
criterion_main!(benches);

View File

@@ -0,0 +1,89 @@
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
use lib0::{
decoding::{Cursor, Read},
encoding::Write,
};
const BENCHMARK_SIZE: u32 = 100000;
fn codec(c: &mut Criterion) {
let mut codec_group = c.benchmark_group("codec");
codec_group.sampling_mode(SamplingMode::Flat);
{
codec_group.bench_function("lib0 encode var_int (64 bit)", |b| {
b.iter(|| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as i64) {
encoder.write_var(i);
}
})
});
codec_group.bench_function("lib0 decode var_int (64 bit)", |b| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as i64) {
encoder.write_var(i);
}
b.iter(|| {
let mut decoder = Cursor::from(&encoder);
for i in 0..(BENCHMARK_SIZE as i64) {
let num: i64 = decoder.read_var().unwrap();
assert_eq!(num, i);
}
})
});
}
{
codec_group.bench_function("lib0 encode var_uint (32 bit)", |b| {
b.iter(|| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..BENCHMARK_SIZE {
encoder.write_var(i);
}
})
});
codec_group.bench_function("lib0 decode var_uint (32 bit)", |b| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..BENCHMARK_SIZE {
encoder.write_var(i);
}
b.iter(|| {
let mut decoder = Cursor::from(&encoder);
for i in 0..BENCHMARK_SIZE {
let num: u32 = decoder.read_var().unwrap();
assert_eq!(num, i);
}
})
});
}
{
codec_group.bench_function("lib0 encode var_uint (64 bit)", |b| {
b.iter(|| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as u64) {
encoder.write_var(i);
}
})
});
codec_group.bench_function("lib0 decode var_uint (64 bit)", |b| {
let mut encoder = Vec::with_capacity(BENCHMARK_SIZE as usize * 8);
for i in 0..(BENCHMARK_SIZE as u64) {
encoder.write_var(i);
}
b.iter(|| {
let mut decoder = Cursor::from(&encoder);
for i in 0..(BENCHMARK_SIZE as u64) {
let num: u64 = decoder.read_var().unwrap();
assert_eq!(num, i);
}
})
});
}
}
criterion_group!(benches, codec);
criterion_main!(benches);

View File

@@ -0,0 +1,79 @@
use std::time::Duration;
use criterion::{criterion_group, criterion_main, Criterion};
fn operations(c: &mut Criterion) {
let mut group = c.benchmark_group("ops/map");
group.measurement_time(Duration::from_secs(15));
group.bench_function("yrs/insert", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
b.iter(|| {
use yrs::{Doc, Map, Transact};
let doc = Doc::new();
let map = doc.get_or_insert_map("test");
let mut trx = doc.transact_mut();
for (idx, key) in base_text.iter().enumerate() {
map.insert(&mut trx, key.to_string(), idx as f64);
}
drop(trx);
});
});
group.bench_function("yrs/get", |b| {
use yrs::{Doc, Map, Transact};
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
let doc = Doc::new();
let map = doc.get_or_insert_map("test");
let mut trx = doc.transact_mut();
for (idx, key) in base_text.iter().enumerate() {
map.insert(&mut trx, key.to_string(), idx as f64);
}
drop(trx);
b.iter(|| {
let trx = doc.transact();
for key in &base_text {
map.get(&trx, key).unwrap();
}
});
});
group.bench_function("yrs/remove", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
b.iter(|| {
use yrs::{Doc, Map, Transact};
let doc = Doc::new();
let map = doc.get_or_insert_map("test");
let mut trx = doc.transact_mut();
for (idx, key) in base_text.iter().enumerate() {
map.insert(&mut trx, key.to_string(), idx as f64);
}
for key in &base_text {
map.remove(&mut trx, key).unwrap();
}
drop(trx);
});
});
group.finish();
}
criterion_group!(benches, operations);
criterion_main!(benches);

View File

@@ -0,0 +1,54 @@
use std::time::Duration;
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{Rng, SeedableRng};
fn operations(c: &mut Criterion) {
let mut group = c.benchmark_group("ops/text");
group.measurement_time(Duration::from_secs(15));
group.bench_function("yrs/insert", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
let mut rng = rand_chacha::ChaCha20Rng::seed_from_u64(1234);
let idxs = (0..99)
.map(|_| rng.random_range(0..base_text.len() as u32))
.collect::<Vec<_>>();
b.iter(|| {
use yrs::{Doc, Text, Transact};
let doc = Doc::new();
let text = doc.get_or_insert_text("test");
let mut trx = doc.transact_mut();
text.push(&mut trx, base_text);
for idx in &idxs {
text.insert(&mut trx, *idx, "test");
}
drop(trx);
});
});
group.bench_function("yrs/remove", |b| {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9";
b.iter(|| {
use yrs::{Doc, Text, Transact};
let doc = Doc::new();
let text = doc.get_or_insert_text("test");
let mut trx = doc.transact_mut();
text.push(&mut trx, base_text);
text.push(&mut trx, base_text);
text.push(&mut trx, base_text);
for idx in (base_text.len() as u32)..0 {
text.remove_range(&mut trx, idx, 1);
}
drop(trx);
});
});
group.finish();
}
criterion_group!(benches, operations);
criterion_main!(benches);

View File

@@ -0,0 +1,33 @@
mod utils;
use std::time::Duration;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;
fn update(c: &mut Criterion) {
let files = Files::load();
let mut group = c.benchmark_group("update");
group.measurement_time(Duration::from_secs(15));
for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("parse with yrs", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use yrs::{updates::decoder::Decode, Update};
Update::decode_v1(content).unwrap()
});
},
);
}
group.finish();
}
criterion_group!(benches, update);
criterion_main!(benches);

View File

@@ -0,0 +1,42 @@
use std::{
fs::{read, read_dir},
path::{Path, PathBuf},
};
use path_ext::PathExt;
pub struct File {
pub path: PathBuf,
pub content: Vec<u8>,
}
const BASE: &str = "../y-octo/src/fixtures/";
impl File {
fn new(path: &Path) -> Self {
let content = read(path).unwrap();
Self {
path: path.into(),
content,
}
}
}
pub struct Files {
pub files: Vec<File>,
}
impl Files {
pub fn load() -> Self {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(BASE);
let files = read_dir(path).unwrap();
let files = files
.flatten()
.filter(|f| f.path().is_file() && f.path().ext_str() == "bin")
.map(|f| File::new(&f.path()))
.collect::<Vec<_>>();
Self { files }
}
}

View File

@@ -0,0 +1,3 @@
mod files;
pub use files::Files;

View File

@@ -0,0 +1,134 @@
use std::{
collections::HashMap,
io::{self, BufRead},
};
fn process_duration(duration: &str) -> Option<(f64, f64)> {
let dur_split: Vec<String> = duration.split('±').map(String::from).collect();
if dur_split.len() != 2 {
return None;
}
let units = dur_split[1]
.chars()
.skip_while(|c| c.is_ascii_digit())
.collect::<String>();
let dur_secs = dur_split[0].parse::<f64>().ok()?;
let error_secs = dur_split[1]
.chars()
.take_while(|c| c.is_ascii_digit())
.collect::<String>()
.parse::<f64>()
.ok()?;
Some((
convert_dur_to_seconds(dur_secs, &units),
convert_dur_to_seconds(error_secs, &units),
))
}
fn convert_dur_to_seconds(dur: f64, units: &str) -> f64 {
let factors: HashMap<_, _> = [
("s", 1.0),
("ms", 1.0 / 1000.0),
("µs", 1.0 / 1_000_000.0),
("ns", 1.0 / 1_000_000_000.0),
]
.iter()
.cloned()
.collect();
dur * factors.get(units).unwrap_or(&1.0)
}
fn is_significant(changes_dur: f64, changes_err: f64, base_dur: f64, base_err: f64) -> bool {
if changes_dur < base_dur {
changes_dur + changes_err < base_dur || base_dur - base_err > changes_dur
} else {
changes_dur - changes_err > base_dur || base_dur + base_err < changes_dur
}
}
fn convert_to_markdown() -> impl Iterator<Item = String> {
#[cfg(feature = "bench")]
let re = regex::Regex::new(r"\s{2,}").unwrap();
io::stdin()
.lock()
.lines()
.skip(2)
.flat_map(move |row| {
if let Ok(_row) = row {
let columns = {
#[cfg(feature = "bench")]
{
re.split(&_row).collect::<Vec<_>>()
}
#[cfg(not(feature = "bench"))]
Vec::<&str>::new()
};
let name = columns.first()?;
let base_duration = columns.get(2)?;
let changes_duration = columns.get(5)?;
Some((
name.to_string(),
base_duration.to_string(),
changes_duration.to_string(),
))
} else {
None
}
})
.flat_map(|(name, base_duration, changes_duration)| {
let mut difference = "N/A".to_string();
let base_undefined = base_duration == "?";
let changes_undefined = changes_duration == "?";
if !base_undefined && !changes_undefined {
let (base_dur_secs, base_err_secs) = process_duration(&base_duration)?;
let (changes_dur_secs, changes_err_secs) = process_duration(&changes_duration)?;
let diff = -(1.0 - changes_dur_secs / base_dur_secs) * 100.0;
difference = format!("{:+.2}%", diff);
if is_significant(
changes_dur_secs,
changes_err_secs,
base_dur_secs,
base_err_secs,
) {
difference = format!("**{}**", difference);
}
}
Some(format!(
"| {} | {} | {} | {} |",
name.replace('|', "\\|"),
if base_undefined {
"N/A"
} else {
&base_duration
},
if changes_undefined {
"N/A"
} else {
&changes_duration
},
difference
))
})
}
fn main() {
let platform = std::env::args().nth(1).expect("Missing platform argument");
let headers = vec![
format!("## Benchmark for {}", platform),
"<details>".to_string(),
" <summary>Click to view benchmark</summary>".to_string(),
"".to_string(),
"| Test | Base | PR | % |".to_string(),
"| --- | --- | --- | --- |".to_string(),
];
for line in headers.into_iter().chain(convert_to_markdown()) {
println!("{}", line);
}
println!("</details>");
}

View File

@@ -0,0 +1,100 @@
use std::{
fs::read,
io::{Error, ErrorKind},
path::PathBuf,
time::Instant,
};
use clap::Parser;
use y_octo::Doc;
/// ybinary merger
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Path of the ybinary to read
#[arg(short, long)]
path: String,
}
fn load_path(path: &str) -> Result<Vec<Vec<u8>>, Error> {
let path = PathBuf::from(path);
if path.is_dir() {
let mut updates = Vec::new();
let mut paths = path
.read_dir()?
.filter_map(|entry| {
let entry = entry.ok()?;
if entry.path().is_file() {
Some(entry.path())
} else {
None
}
})
.collect::<Vec<_>>();
paths.sort();
for path in paths {
println!("read {:?}", path);
updates.push(read(path)?);
}
Ok(updates)
} else if path.is_file() {
Ok(vec![read(path)?])
} else {
Err(Error::new(ErrorKind::NotFound, "not a file or directory"))
}
}
fn main() {
let args = Args::parse();
jwst_merge(&args.path);
}
fn jwst_merge(path: &str) {
let updates = load_path(path).unwrap();
let mut doc = Doc::default();
for (i, update) in updates.iter().enumerate() {
println!("apply update{i} {} bytes", update.len());
doc.apply_update_from_binary_v1(update.clone()).unwrap();
}
println!("press enter to continue");
std::io::stdin().read_line(&mut String::new()).unwrap();
let ts = Instant::now();
let history = doc.history().parse_store(Default::default());
println!("history: {:?}", ts.elapsed());
for history in history.iter().take(100) {
println!("history: {:?}", history);
}
doc.gc().unwrap();
let binary = {
let binary = doc.encode_update_v1().unwrap();
println!("merged {} bytes", binary.len());
binary
};
{
let mut doc = Doc::default();
doc.apply_update_from_binary_v1(binary.clone()).unwrap();
let new_binary = doc.encode_update_v1().unwrap();
println!("re-encoded {} bytes", new_binary.len(),);
};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[ignore = "only for debug"]
fn test_gc() {
jwst_merge("/Users/ds/Downloads/out");
}
}

View File

@@ -0,0 +1,79 @@
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha20Rng;
use y_octo::*;
fn run_text_test(seed: u64) {
let doc = Doc::with_client(1);
let mut rand = ChaCha20Rng::seed_from_u64(seed);
let mut text = doc.get_or_create_text("test").unwrap();
text.insert(0, "This is a string with length 32.").unwrap();
let iteration = 20;
let mut len = 32;
for i in 0..iteration {
let mut text = text.clone();
let ins = i % 2 == 0;
let pos = rand.random_range(0..if ins { text.len() } else { len / 2 });
if ins {
let str = format!("hello {i}");
text.insert(pos, &str).unwrap();
len += str.len() as u64;
} else {
text.remove(pos, 6).unwrap();
len -= 6;
}
}
assert_eq!(text.to_string().len(), len as usize);
assert_eq!(text.len(), len);
}
fn run_array_test(seed: u64) {
let doc = Doc::with_client(1);
let mut rand = ChaCha20Rng::seed_from_u64(seed);
let mut array = doc.get_or_create_array("test").unwrap();
array.push(1).unwrap();
let iteration = 20;
let mut len = 1;
for i in 0..iteration {
let mut array = array.clone();
let ins = i % 2 == 0;
let pos = rand.random_range(0..if ins { array.len() } else { len / 2 });
if ins {
array.insert(pos, 1).unwrap();
len += 1;
} else {
array.remove(pos, 1).unwrap();
len -= 1;
}
}
assert_eq!(array.len(), len);
}
fn run_map_test() {
let base_text = "test1 test2 test3 test4 test5 test6 test7 test8 test9"
.split(' ')
.collect::<Vec<_>>();
for _ in 0..10000 {
let doc = Doc::default();
let mut map = doc.get_or_create_map("test").unwrap();
for (idx, key) in base_text.iter().enumerate() {
map.insert(key.to_string(), idx).unwrap();
}
}
}
fn main() {
let mut rand = ChaCha20Rng::seed_from_u64(rand::rng().random());
for _ in 0..10000 {
let seed = rand.random();
run_array_test(seed);
run_text_test(seed);
run_map_test();
}
}

View File

@@ -0,0 +1,4 @@
target
corpus
artifacts
coverage

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,88 @@
[package]
edition = "2021"
name = "y-octo-fuzz"
publish = false
version = "0.0.0"
[package.metadata]
cargo-fuzz = true
[dependencies]
lib0 = "=0.16.10"
libfuzzer-sys = "0.4"
rand = "0.9"
rand_chacha = "0.9"
yrs = "=0.23.0"
y-octo-utils = { path = "..", features = ["fuzz"] }
[dependencies.y-octo]
path = "../../core"
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
doc = false
name = "codec_doc_any_struct"
path = "fuzz_targets/codec_doc_any_struct.rs"
test = false
[[bin]]
doc = false
name = "codec_doc_any"
path = "fuzz_targets/codec_doc_any.rs"
test = false
[[bin]]
doc = false
name = "decode_bytes"
path = "fuzz_targets/decode_bytes.rs"
test = false
[[bin]]
doc = false
name = "ins_del_text"
path = "fuzz_targets/ins_del_text.rs"
test = false
[[bin]]
doc = false
name = "sync_message"
path = "fuzz_targets/sync_message.rs"
test = false
[[bin]]
doc = false
name = "i32_decode"
path = "fuzz_targets/i32_decode.rs"
test = false
[[bin]]
doc = false
name = "i32_encode"
path = "fuzz_targets/i32_encode.rs"
test = false
[[bin]]
doc = false
name = "u64_decode"
path = "fuzz_targets/u64_decode.rs"
test = false
[[bin]]
doc = false
name = "u64_encode"
path = "fuzz_targets/u64_encode.rs"
test = false
[[bin]]
doc = false
name = "apply_update"
path = "fuzz_targets/apply_update.rs"
test = false

View File

@@ -0,0 +1,51 @@
#![no_main]
use std::collections::HashSet;
use libfuzzer_sys::fuzz_target;
use y_octo_utils::{
gen_nest_type_from_nest_type, gen_nest_type_from_root, CRDTParam, ManipulateSource, OpType,
OpsRegistry, YrsNestType,
};
use yrs::Transact;
fuzz_target!(|crdt_params: Vec<CRDTParam>| {
let mut doc = yrs::Doc::new();
let mut cur_crdt_nest_type: Option<YrsNestType> = None;
let ops_registry = OpsRegistry::new();
let mut key_set = HashSet::<String>::new();
for crdt_param in crdt_params {
if key_set.contains(&crdt_param.key) {
continue;
}
key_set.insert(crdt_param.key.clone());
match crdt_param.op_type {
OpType::HandleCurrent => {
if cur_crdt_nest_type.is_some() {
ops_registry.operate_yrs_nest_type(&doc, cur_crdt_nest_type.clone().unwrap(), crdt_param);
}
}
OpType::CreateCRDTNestType => {
cur_crdt_nest_type = match cur_crdt_nest_type {
None => gen_nest_type_from_root(&mut doc, &crdt_param),
Some(mut nest_type) => match crdt_param.manipulate_source {
ManipulateSource::CurrentNestType => Some(nest_type),
ManipulateSource::NewNestTypeFromYDocRoot => {
gen_nest_type_from_root(&mut doc, &crdt_param)
}
ManipulateSource::NewNestTypeFromCurrent => {
gen_nest_type_from_nest_type(&mut doc, crdt_param.clone(), &mut nest_type)
}
},
};
}
};
}
let trx = doc.transact_mut();
let binary_from_yrs = trx.encode_update_v1();
let doc = y_octo::Doc::try_from_binary_v1(&binary_from_yrs).unwrap();
let binary = doc.encode_update_v1().unwrap();
assert_eq!(binary, binary_from_yrs);
});

View File

@@ -0,0 +1,17 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use y_octo::{Any, CrdtRead, CrdtWrite, RawDecoder, RawEncoder};
fuzz_target!(|data: &[u8]| {
if let Ok(any) = Any::read(&mut RawDecoder::new(data)) {
// ensure decoding and re-encoding results has same result
let mut buffer = RawEncoder::default();
if let Err(e) = any.write(&mut buffer) {
panic!("Failed to write message: {:?}, {:?}", any, e);
}
if let Ok(any2) = Any::read(&mut RawDecoder::new(&buffer.into_inner())) {
assert_eq!(any, any2);
}
}
});

View File

@@ -0,0 +1,43 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use rand::{distr::Alphanumeric, Rng};
use y_octo::{Any, CrdtRead, CrdtWrite, RawDecoder, RawEncoder};
fn get_random_string() -> String {
rand::rng()
.sample_iter(&Alphanumeric)
.take(7)
.map(char::from)
.collect()
}
fuzz_target!(|data: Vec<Any>| {
{
let any = Any::Object(
data
.iter()
.map(|a| (get_random_string(), a.clone()))
.collect(),
);
let mut buffer = RawEncoder::default();
if let Err(e) = any.write(&mut buffer) {
panic!("Failed to write message: {:?}, {:?}", any, e);
}
if let Ok(any2) = Any::read(&mut RawDecoder::new(&buffer.into_inner())) {
assert_eq!(any, any2);
}
}
{
let any = Any::Array(data);
let mut buffer = RawEncoder::default();
if let Err(e) = any.write(&mut buffer) {
panic!("Failed to write message: {:?}, {:?}", any, e);
}
if let Ok(any2) = Any::read(&mut RawDecoder::new(&buffer.into_inner())) {
assert_eq!(any, any2);
}
}
});

Some files were not shown because too many files have changed in this diff Show More