mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-04 00:28:33 +00:00
feat: native doc reader (#13881)
This commit is contained in:
@@ -5,7 +5,14 @@ rustflags = ["-C", "target-feature=+crt-static"]
|
||||
[target.'cfg(target_os = "linux")']
|
||||
rustflags = ["-C", "link-args=-Wl,--warn-unresolved-symbols"]
|
||||
[target.'cfg(target_os = "macos")']
|
||||
rustflags = ["-C", "link-args=-Wl,-undefined,dynamic_lookup,-no_fixup_chains", "-C", "link-args=-all_load", "-C", "link-args=-weak_framework ScreenCaptureKit"]
|
||||
rustflags = [
|
||||
"-C",
|
||||
"link-args=-Wl,-undefined,dynamic_lookup,-no_fixup_chains",
|
||||
"-C",
|
||||
"link-args=-all_load",
|
||||
"-C",
|
||||
"link-args=-weak_framework ScreenCaptureKit",
|
||||
]
|
||||
# https://sourceware.org/bugzilla/show_bug.cgi?id=21032
|
||||
# https://sourceware.org/bugzilla/show_bug.cgi?id=21031
|
||||
# https://github.com/rust-lang/rust/issues/134820
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
include = ["./*.toml", "./packages/**/*.toml"]
|
||||
exclude = ["node_modules/**/*.toml", "target/**/*.toml"]
|
||||
|
||||
# https://taplo.tamasfe.dev/configuration/formatter-options.html
|
||||
[formatting]
|
||||
align_entries = true
|
||||
column_width = 180
|
||||
reorder_arrays = true
|
||||
indent_tables = true
|
||||
reorder_keys = true
|
||||
|
||||
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -40,6 +40,7 @@ dependencies = [
|
||||
name = "affine_common"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert-json-diff",
|
||||
"cc",
|
||||
"chrono",
|
||||
"criterion2",
|
||||
@@ -50,6 +51,7 @@ dependencies = [
|
||||
"rand 0.9.1",
|
||||
"rayon",
|
||||
"readability",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha3",
|
||||
"strum_macros",
|
||||
@@ -69,6 +71,7 @@ dependencies = [
|
||||
"tree-sitter-scala",
|
||||
"tree-sitter-typescript",
|
||||
"url",
|
||||
"y-octo",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
215
Cargo.toml
215
Cargo.toml
@@ -13,109 +13,122 @@ members = [
|
||||
]
|
||||
resolver = "3"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
|
||||
[workspace.dependencies]
|
||||
affine_common = { path = "./packages/common/native" }
|
||||
affine_nbstore = { path = "./packages/frontend/native/nbstore" }
|
||||
ahash = "0.8"
|
||||
anyhow = "1"
|
||||
arbitrary = { version = "1.3", features = ["derive"] }
|
||||
assert-json-diff = "2.0"
|
||||
async-lock = { version = "3.4.0", features = ["loom"] }
|
||||
base64-simd = "0.8"
|
||||
bitvec = "1.0"
|
||||
block2 = "0.6"
|
||||
byteorder = "1.5"
|
||||
cpal = "0.15"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4.4", features = ["derive"] }
|
||||
core-foundation = "0.10"
|
||||
coreaudio-rs = "0.12"
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
criterion2 = { version = "3", default-features = false }
|
||||
crossbeam-channel = "0.5"
|
||||
dispatch2 = "0.3"
|
||||
docx-parser = { git = "https://github.com/toeverything/docx-parser" }
|
||||
dotenvy = "0.15"
|
||||
file-format = { version = "0.28", features = ["reader"] }
|
||||
homedir = "0.3"
|
||||
infer = { version = "0.19.0" }
|
||||
lasso = { version = "0.7", features = ["multi-threaded"] }
|
||||
lib0 = { version = "0.16", features = ["lib0-serde"] }
|
||||
libc = "0.2"
|
||||
log = "0.4"
|
||||
loom = { version = "0.7", features = ["checkpoint"] }
|
||||
mimalloc = "0.1"
|
||||
mp4parse = "0.17"
|
||||
nanoid = "0.4"
|
||||
napi = { version = "3.0.0-beta.3", features = ["async", "chrono_date", "error_anyhow", "napi9", "serde"] }
|
||||
napi-build = { version = "2" }
|
||||
napi-derive = { version = "3.0.0-beta.3" }
|
||||
nom = "8"
|
||||
notify = { version = "8", features = ["serde"] }
|
||||
objc2 = "0.6"
|
||||
objc2-foundation = "0.3"
|
||||
once_cell = "1"
|
||||
ordered-float = "5"
|
||||
parking_lot = "0.12"
|
||||
path-ext = "0.1.2"
|
||||
pdf-extract = { git = "https://github.com/toeverything/pdf-extract", branch = "darksky/improve-font-decoding" }
|
||||
phf = { version = "0.11", features = ["macros"] }
|
||||
proptest = "1.3"
|
||||
proptest-derive = "0.5"
|
||||
rand = "0.9"
|
||||
rand_chacha = "0.9"
|
||||
rand_distr = "0.5"
|
||||
rayon = "1.10"
|
||||
readability = { version = "0.3.0", default-features = false }
|
||||
regex = "1.10"
|
||||
rubato = "0.16"
|
||||
screencapturekit = "0.3"
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
sha3 = "0.10"
|
||||
smol_str = "0.3"
|
||||
sqlx = { version = "0.8", default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
strum_macros = "0.27.0"
|
||||
symphonia = { version = "0.5", features = ["all", "opt-simd"] }
|
||||
text-splitter = "0.27"
|
||||
thiserror = "2"
|
||||
tiktoken-rs = "0.7"
|
||||
tokio = "1.45"
|
||||
tree-sitter = { version = "0.25" }
|
||||
tree-sitter-c = { version = "0.24" }
|
||||
tree-sitter-c-sharp = { version = "0.23" }
|
||||
tree-sitter-cpp = { version = "0.23" }
|
||||
tree-sitter-go = { version = "0.23" }
|
||||
tree-sitter-java = { version = "0.23" }
|
||||
tree-sitter-javascript = { version = "0.23" }
|
||||
tree-sitter-kotlin-ng = { version = "1.1" }
|
||||
tree-sitter-python = { version = "0.23" }
|
||||
tree-sitter-rust = { version = "0.24" }
|
||||
tree-sitter-scala = { version = "0.24" }
|
||||
tree-sitter-typescript = { version = "0.23" }
|
||||
uniffi = "0.29"
|
||||
url = { version = "2.5" }
|
||||
uuid = "1.8"
|
||||
v_htmlescape = "0.15"
|
||||
windows = { version = "0.61", features = [
|
||||
[workspace.dependencies]
|
||||
affine_common = { path = "./packages/common/native" }
|
||||
affine_nbstore = { path = "./packages/frontend/native/nbstore" }
|
||||
ahash = "0.8"
|
||||
anyhow = "1"
|
||||
arbitrary = { version = "1.3", features = ["derive"] }
|
||||
assert-json-diff = "2.0"
|
||||
async-lock = { version = "3.4.0", features = ["loom"] }
|
||||
base64-simd = "0.8"
|
||||
bitvec = "1.0"
|
||||
block2 = "0.6"
|
||||
byteorder = "1.5"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4.4", features = ["derive"] }
|
||||
core-foundation = "0.10"
|
||||
coreaudio-rs = "0.12"
|
||||
cpal = "0.15"
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
criterion2 = { version = "3", default-features = false }
|
||||
crossbeam-channel = "0.5"
|
||||
dispatch2 = "0.3"
|
||||
docx-parser = { git = "https://github.com/toeverything/docx-parser" }
|
||||
dotenvy = "0.15"
|
||||
file-format = { version = "0.28", features = ["reader"] }
|
||||
homedir = "0.3"
|
||||
infer = { version = "0.19.0" }
|
||||
lasso = { version = "0.7", features = ["multi-threaded"] }
|
||||
lib0 = { version = "0.16", features = ["lib0-serde"] }
|
||||
libc = "0.2"
|
||||
log = "0.4"
|
||||
loom = { version = "0.7", features = ["checkpoint"] }
|
||||
mimalloc = "0.1"
|
||||
mp4parse = "0.17"
|
||||
nanoid = "0.4"
|
||||
napi = { version = "3.0.0-beta.3", features = [
|
||||
"async",
|
||||
"chrono_date",
|
||||
"error_anyhow",
|
||||
"napi9",
|
||||
"serde",
|
||||
] }
|
||||
napi-build = { version = "2" }
|
||||
napi-derive = { version = "3.0.0-beta.3" }
|
||||
nom = "8"
|
||||
notify = { version = "8", features = ["serde"] }
|
||||
objc2 = "0.6"
|
||||
objc2-foundation = "0.3"
|
||||
once_cell = "1"
|
||||
ordered-float = "5"
|
||||
parking_lot = "0.12"
|
||||
path-ext = "0.1.2"
|
||||
pdf-extract = { git = "https://github.com/toeverything/pdf-extract", branch = "darksky/improve-font-decoding" }
|
||||
phf = { version = "0.11", features = ["macros"] }
|
||||
proptest = "1.3"
|
||||
proptest-derive = "0.5"
|
||||
rand = "0.9"
|
||||
rand_chacha = "0.9"
|
||||
rand_distr = "0.5"
|
||||
rayon = "1.10"
|
||||
readability = { version = "0.3.0", default-features = false }
|
||||
regex = "1.10"
|
||||
rubato = "0.16"
|
||||
screencapturekit = "0.3"
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
sha3 = "0.10"
|
||||
smol_str = "0.3"
|
||||
sqlx = { version = "0.8", default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
strum_macros = "0.27.0"
|
||||
symphonia = { version = "0.5", features = ["all", "opt-simd"] }
|
||||
text-splitter = "0.27"
|
||||
thiserror = "2"
|
||||
tiktoken-rs = "0.7"
|
||||
tokio = "1.45"
|
||||
tree-sitter = { version = "0.25" }
|
||||
tree-sitter-c = { version = "0.24" }
|
||||
tree-sitter-c-sharp = { version = "0.23" }
|
||||
tree-sitter-cpp = { version = "0.23" }
|
||||
tree-sitter-go = { version = "0.23" }
|
||||
tree-sitter-java = { version = "0.23" }
|
||||
tree-sitter-javascript = { version = "0.23" }
|
||||
tree-sitter-kotlin-ng = { version = "1.1" }
|
||||
tree-sitter-python = { version = "0.23" }
|
||||
tree-sitter-rust = { version = "0.24" }
|
||||
tree-sitter-scala = { version = "0.24" }
|
||||
tree-sitter-typescript = { version = "0.23" }
|
||||
uniffi = "0.29"
|
||||
url = { version = "2.5" }
|
||||
uuid = "1.8"
|
||||
v_htmlescape = "0.15"
|
||||
windows = { version = "0.61", features = [
|
||||
"Win32_Devices_FunctionDiscovery",
|
||||
"Win32_UI_Shell_PropertiesSystem",
|
||||
"Win32_Media_Audio",
|
||||
"Win32_System_Variant",
|
||||
"Win32_System_Com_StructuredStorage",
|
||||
"Win32_System_Threading",
|
||||
"Win32_System_ProcessStatus",
|
||||
"Win32_Foundation",
|
||||
"Win32_Media_Audio",
|
||||
"Win32_System_Com",
|
||||
"Win32_System_Com_StructuredStorage",
|
||||
"Win32_System_Diagnostics_ToolHelp",
|
||||
] }
|
||||
windows-core = { version = "0.61" }
|
||||
y-octo = { path = "./packages/common/y-octo/core" }
|
||||
y-sync = { version = "0.4" }
|
||||
yrs = "0.23.0"
|
||||
"Win32_System_ProcessStatus",
|
||||
"Win32_System_Threading",
|
||||
"Win32_System_Variant",
|
||||
"Win32_UI_Shell_PropertiesSystem",
|
||||
] }
|
||||
windows-core = { version = "0.61" }
|
||||
y-octo = { path = "./packages/common/y-octo/core" }
|
||||
y-sync = { version = "0.4" }
|
||||
yrs = "0.23.0"
|
||||
|
||||
[profile.dev.package.sqlx-macros]
|
||||
opt-level = 3
|
||||
@@ -126,6 +139,6 @@ lto = true
|
||||
opt-level = 3
|
||||
strip = "symbols"
|
||||
|
||||
# android uniffi bindgen requires symbols
|
||||
[profile.release.package.affine_mobile_native]
|
||||
strip = "none"
|
||||
# android uniffi bindgen requires symbols
|
||||
[profile.release.package.affine_mobile_native]
|
||||
strip = "none"
|
||||
|
||||
@@ -4,8 +4,22 @@ name = "affine_common"
|
||||
version = "0.1.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
doc-loader = ["docx-parser", "infer", "path-ext", "pdf-extract", "readability", "serde_json", "strum_macros", "text-splitter", "thiserror", "tree-sitter", "url"]
|
||||
default = ["hashcash"]
|
||||
doc-loader = [
|
||||
"docx-parser",
|
||||
"infer",
|
||||
"path-ext",
|
||||
"pdf-extract",
|
||||
"readability",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum_macros",
|
||||
"text-splitter",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
"url",
|
||||
]
|
||||
hashcash = ["sha3", "rand"]
|
||||
tree-sitter = [
|
||||
"cc",
|
||||
"dep:tree-sitter",
|
||||
@@ -21,20 +35,26 @@ tree-sitter = [
|
||||
"dep:tree-sitter-scala",
|
||||
"dep:tree-sitter-typescript",
|
||||
]
|
||||
ydoc-loader = ["assert-json-diff", "y-octo"]
|
||||
|
||||
[dependencies]
|
||||
chrono = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
sha3 = { workspace = true }
|
||||
rand = { workspace = true, optional = true }
|
||||
sha3 = { workspace = true, optional = true }
|
||||
|
||||
assert-json-diff = { workspace = true, optional = true }
|
||||
docx-parser = { workspace = true, optional = true }
|
||||
infer = { workspace = true, optional = true }
|
||||
path-ext = { workspace = true, optional = true }
|
||||
pdf-extract = { workspace = true, optional = true }
|
||||
readability = { workspace = true, optional = true, default-features = false }
|
||||
serde = { workspace = true, optional = true, features = ["derive"] }
|
||||
serde_json = { workspace = true, optional = true }
|
||||
strum_macros = { workspace = true, optional = true }
|
||||
text-splitter = { workspace = true, features = ["markdown", "tiktoken-rs"], optional = true }
|
||||
text-splitter = { workspace = true, features = [
|
||||
"markdown",
|
||||
"tiktoken-rs",
|
||||
], optional = true }
|
||||
thiserror = { workspace = true, optional = true }
|
||||
tree-sitter = { workspace = true, optional = true }
|
||||
tree-sitter-c = { workspace = true, optional = true }
|
||||
@@ -49,6 +69,7 @@ tree-sitter-rust = { workspace = true, optional = true }
|
||||
tree-sitter-scala = { workspace = true, optional = true }
|
||||
tree-sitter-typescript = { workspace = true, optional = true }
|
||||
url = { workspace = true, optional = true }
|
||||
y-octo = { workspace = true, optional = true }
|
||||
|
||||
tiktoken-rs = { workspace = true }
|
||||
|
||||
|
||||
BIN
packages/common/native/fixtures/demo.ydoc
Normal file
BIN
packages/common/native/fixtures/demo.ydoc
Normal file
Binary file not shown.
567
packages/common/native/fixtures/demo.ydoc.json
Normal file
567
packages/common/native/fixtures/demo.ydoc.json
Normal file
@@ -0,0 +1,567 @@
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"block_id": "F_zl1z0ex6dSxM25ZBUWk",
|
||||
"flavour": "affine:page",
|
||||
"content": [
|
||||
"Write, Draw, Plan all at Once."
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": null,
|
||||
"parent_block_id": null,
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "U8g2Edcy8rSmu_XUpYSTz",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro. "
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "NxvF6M1Zo-TrUoJyFbMy4",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "VHVvola5BtXnC7FMaGzE7",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"You own your data, with no compromises"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "Hvbw1lSZ7Bl-LMiOc_xFb",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Local-first & Real-time collaborative"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "2N8pJ32byPEc7dlJv-0cC",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"We love the idea proposed by Ink & Switch in the famous article about you owning your data, despite the cloud. Furthermore, AFFiNE is the first all-in-one workspace that keeps your data ownership with no compromises on real-time collaboration and editing experience."
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "rysuvULJQgFPRmNB_iTk3",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"AFFiNE is a local-first application upon CRDTs with real-time collaboration support. Your data is always stored locally while multiple nodes remain synced in real-time."
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "zbvmiGhuxn0_tfJ_e4N2V",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "omG6fe87r60dPxCtv_Odb",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"omG6fe87r60dPxCtv_Odb\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "CQeQaaREQwGDXg9oAng4q",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Blocks that assemble your next docs, tasks kanban or whiteboard"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "QqLfbOIINoAxapnB23E5t",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"QqLfbOIINoAxapnB23E5t\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "RTtZAHZG4xoAXKKnEsRhB",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"There is a large overlap of their atomic \"building blocks\" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further. "
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "Fpxp7rZCad7-W51xCWta1",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"Fpxp7rZCad7-W51xCWta1\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "c_KFpzTBHdioeoIsnLIjd",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"We are building AFFiNE to be a fundamental open source platform that contains all the building blocks for docs, task management and visual collaboration, hoping you can shape your next workflow with us that can make your life better and also connect others, too."
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "Fpxp7rZCad7-W51xCWta1",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"Fpxp7rZCad7-W51xCWta1\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "O6D7PxXwCbfCLp1_peft1",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"If you want to learn more about the product design of AFFiNE, here goes the concepts:"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "Fpxp7rZCad7-W51xCWta1",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"Fpxp7rZCad7-W51xCWta1\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "os84XIVFN6z6aN9tq16S5",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"To Shape, not to adapt. AFFiNE is built for individuals & teams who care about their data, who refuse vendor lock-in, and who want to have control over their essential tools."
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "Fpxp7rZCad7-W51xCWta1",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"Fpxp7rZCad7-W51xCWta1\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "8jk1eREgtKdhwtee0nym-",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"A true canvas for blocks in any form"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "kDyc-RntYCDWSSYFl4SSn",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Many editor apps claimed to be a canvas for productivity. Since the Mother of All Demos, Douglas Engelbart, a creative and programable digital workspace has been a pursuit and an ultimate mission for generations of tool makers. "
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "k0e4IqfQ2PRmsXjzDQ-3T",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "QR-4T-qYH3gTbbo_VKjU-",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"\"We shape our tools and thereafter our tools shape us”. A lot of pioneers have inspired us a long the way, e.g.:"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "nyEEsW01cCQVIr7yG9ywg",
|
||||
"flavour": "affine:list",
|
||||
"content": [
|
||||
"Quip & Notion with their great concept of \"everything is a block\""
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "ZmeTH3oq8ON10pqV1pQ-n",
|
||||
"flavour": "affine:list",
|
||||
"content": [
|
||||
"Trello with their Kanban"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "SbbdUF95MVoBTlxILHyUL",
|
||||
"flavour": "affine:list",
|
||||
"content": [
|
||||
"Airtable & Miro with their no-code programable datasheets"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "fIQp-yM2v8Iar7kXAooXh",
|
||||
"flavour": "affine:list",
|
||||
"content": [
|
||||
"Miro & Whimiscal with their edgeless visual whiteboard"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "2S9xQwALMBxbas1N6Eyf4",
|
||||
"flavour": "affine:list",
|
||||
"content": [
|
||||
"Remnote & Capacities with their object-based tag system"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "sfycWDfMpgKsE7s5qucWr",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"sfycWDfMpgKsE7s5qucWr\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "C5xu77eIYukdKCu7h0Cjp",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"For more details, please refer to our RoadMap"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "tPAicJ4WMxtBhys0Exlxq",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"tPAicJ4WMxtBhys0Exlxq\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "2goqe7zWXhtKX-pLcYN-h",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Self Host"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "tPAicJ4WMxtBhys0Exlxq",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"tPAicJ4WMxtBhys0Exlxq\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "cBud4or0twRvF36twhwqZ",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Self host AFFiNE"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "tPAicJ4WMxtBhys0Exlxq",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"tPAicJ4WMxtBhys0Exlxq\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"flavour": "affine:database",
|
||||
"content": [
|
||||
"Learning From",
|
||||
"Title",
|
||||
"Tag",
|
||||
"Reference",
|
||||
"Developers",
|
||||
"AFFiNE"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "j9pzdPaGIVH627CPh_mik",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "YJ13TsE8mv5mxy2DIpMOD",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Affine Development"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "T_BOz9f-9uD5QjKABiciE",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"For developers or installations guides, please go to AFFiNE Doc"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "yiMJW4ASGVUC7mly7ulce",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Quip & Notion with their great concept of \"everything is a block\""
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "qug1UtELLsBg2XwU1IJYK",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Trello with their Kanban"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "p86Sceg7KYFe1OgUUswCM",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Airtable & Miro with their no-code programable datasheets"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "ahyiL8RAajMoIoDekk2Je",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Miro & Whimiscal with their edgeless visual whiteboard"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "pKh_LNdwWD-yLDh-TVYAz",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Remnote & Capacities with their object-based tag system"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:database",
|
||||
"parent_block_id": "FB07QqbHZhRqj43IEOzJP",
|
||||
"additional": "{\"databaseName\":\"Learning From\",\"displayMode\":\"page\",\"noteBlockId\":\"j9pzdPaGIVH627CPh_mik\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "b0ftuFAo7qDYRiI18TN6b",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"Affine Development"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "wyVFKrydUAthKQEML84vE",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"wyVFKrydUAthKQEML84vE\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "64rS_MtVI-f_MpE6qCbls",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
"For developer or installation guides, please go to AFFiNE Development"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "wyVFKrydUAthKQEML84vE",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"wyVFKrydUAthKQEML84vE\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "7dotDICZwPWNvFXZSh-oF",
|
||||
"flavour": "affine:paragraph",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:note",
|
||||
"parent_block_id": "wyVFKrydUAthKQEML84vE",
|
||||
"additional": "{\"displayMode\":\"page\",\"noteBlockId\":\"wyVFKrydUAthKQEML84vE\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "Jn4N8YdsFnqjBn-ae5zeR",
|
||||
"flavour": "affine:surface",
|
||||
"content": [
|
||||
"",
|
||||
" ",
|
||||
"AFFiNE ",
|
||||
"Database Reference",
|
||||
"Development",
|
||||
"Related Articles",
|
||||
"Self-host",
|
||||
"What is AFFiNE",
|
||||
"You can check these URLs to learn about AFFiNE"
|
||||
],
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:page",
|
||||
"parent_block_id": "F_zl1z0ex6dSxM25ZBUWk",
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "YkZXI2iWRvQARfQPqspP_",
|
||||
"flavour": "affine:bookmark",
|
||||
"content": null,
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:surface",
|
||||
"parent_block_id": "Jn4N8YdsFnqjBn-ae5zeR",
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "6xwR75KDkxvygEFWZfmhZ",
|
||||
"flavour": "affine:bookmark",
|
||||
"content": null,
|
||||
"blob": null,
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:surface",
|
||||
"parent_block_id": "Jn4N8YdsFnqjBn-ae5zeR",
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "AjY4OYuyp2536pBZlf-vD",
|
||||
"flavour": "affine:image",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": [
|
||||
"BFZk3c2ERp-sliRvA7MQ_p3NdkdCLt2Ze0DQ9i21dpA="
|
||||
],
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:surface",
|
||||
"parent_block_id": "Jn4N8YdsFnqjBn-ae5zeR",
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "ErTXTP8VqP5fabuB5FZHH",
|
||||
"flavour": "affine:image",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": [
|
||||
"HWvCItS78DzPGbwcuaGcfkpVDUvL98IvH5SIK8-AcL8="
|
||||
],
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:surface",
|
||||
"parent_block_id": "Jn4N8YdsFnqjBn-ae5zeR",
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
},
|
||||
{
|
||||
"block_id": "hMg8RXNlkD7XJS9b27fGA",
|
||||
"flavour": "affine:image",
|
||||
"content": [
|
||||
""
|
||||
],
|
||||
"blob": [
|
||||
"ZRKpsBoC88qEMmeiXKXqywfA1rLvWoLa5rpEh9x9Oj0="
|
||||
],
|
||||
"ref_doc_id": null,
|
||||
"ref_info": null,
|
||||
"parent_flavour": "affine:surface",
|
||||
"parent_block_id": "Jn4N8YdsFnqjBn-ae5zeR",
|
||||
"additional": "{\"displayMode\":\"edgeless\"}"
|
||||
}
|
||||
],
|
||||
"title": "Write, Draw, Plan all at Once.",
|
||||
"summary": "AFFiNE is an open source all in one workspace, an operating system for all the building blocks of your team wiki, knowledge management and digital assets and a better alternative to Notion and Miro. You own your data, with no compromisesLocal-first & Real-time collaborativeWe love the idea proposed by Ink & Switch in the famous article about you owning your data, despite the cloud. Furthermore, AFFiNE is the first all-in-one workspace that keeps your data ownership with no compromises on real-time collaboration and editing experience.AFFiNE is a local-first application upon CRDTs with real-time collaboration support. Your data is always stored locally while multiple nodes remain synced in real-time.Blocks that assemble your next docs, tasks kanban or whiteboardThere is a large overlap of their atomic \"building blocks\" between these apps. They are neither open source nor have a plugin system like VS Code for contributors to customize. We want to have something that contains all the features we love and goes one step further. "
|
||||
}
|
||||
528
packages/common/native/src/doc_parser.rs
Normal file
528
packages/common/native/src/doc_parser.rs
Normal file
@@ -0,0 +1,528 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map as JsonMap, Value as JsonValue};
|
||||
use thiserror::Error;
|
||||
use y_octo::{Any, DocOptions, JwstCodecError, Map, Value};
|
||||
|
||||
const SUMMARY_LIMIT: usize = 1000;
|
||||
const PAGE_FLAVOUR: &str = "affine:page";
|
||||
const NOTE_FLAVOUR: &str = "affine:note";
|
||||
|
||||
const BOOKMARK_FLAVOURS: [&str; 5] = [
|
||||
"affine:bookmark",
|
||||
"affine:embed-youtube",
|
||||
"affine:embed-figma",
|
||||
"affine:embed-github",
|
||||
"affine:embed-loom",
|
||||
];
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CrawlDocInput {
|
||||
pub doc_bin: Vec<u8>,
|
||||
pub root_doc_bin: Option<Vec<u8>>,
|
||||
pub space_id: String,
|
||||
pub doc_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BlockInfo {
|
||||
pub block_id: String,
|
||||
pub flavour: String,
|
||||
pub content: Option<Vec<String>>,
|
||||
pub blob: Option<Vec<String>>,
|
||||
pub ref_doc_id: Option<Vec<String>>,
|
||||
pub ref_info: Option<Vec<String>>,
|
||||
pub parent_flavour: Option<String>,
|
||||
pub parent_block_id: Option<String>,
|
||||
pub additional: Option<String>,
|
||||
}
|
||||
|
||||
impl BlockInfo {
|
||||
fn base(
|
||||
block_id: &str,
|
||||
flavour: &str,
|
||||
parent_flavour: Option<&String>,
|
||||
parent_block_id: Option<&String>,
|
||||
additional: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
block_id: block_id.to_string(),
|
||||
flavour: flavour.to_string(),
|
||||
content: None,
|
||||
blob: None,
|
||||
ref_doc_id: None,
|
||||
ref_info: None,
|
||||
parent_flavour: parent_flavour.cloned(),
|
||||
parent_block_id: parent_block_id.cloned(),
|
||||
additional,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CrawlResult {
|
||||
pub blocks: Vec<BlockInfo>,
|
||||
pub title: String,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug, Serialize, Deserialize)]
|
||||
pub enum ParseError {
|
||||
#[error("doc_not_found")]
|
||||
DocNotFound,
|
||||
#[error("invalid_binary")]
|
||||
InvalidBinary,
|
||||
#[error("sqlite_error: {0}")]
|
||||
SqliteError(String),
|
||||
#[error("parser_error: {0}")]
|
||||
ParserError(String),
|
||||
#[error("unknown: {0}")]
|
||||
Unknown(String),
|
||||
}
|
||||
|
||||
impl From<JwstCodecError> for ParseError {
|
||||
fn from(value: JwstCodecError) -> Self {
|
||||
Self::ParserError(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_doc_from_binary(input: CrawlDocInput) -> Result<CrawlResult, ParseError> {
|
||||
let CrawlDocInput {
|
||||
doc_bin,
|
||||
root_doc_bin: _,
|
||||
space_id: _,
|
||||
doc_id,
|
||||
} = input;
|
||||
|
||||
if doc_bin.is_empty() {
|
||||
return Err(ParseError::InvalidBinary);
|
||||
}
|
||||
|
||||
let mut doc = DocOptions::new().with_guid(doc_id.clone()).build();
|
||||
doc
|
||||
.apply_update_from_binary_v1(&doc_bin)
|
||||
.map_err(|_| ParseError::InvalidBinary)?;
|
||||
|
||||
let blocks_map = doc.get_map("blocks")?;
|
||||
if blocks_map.is_empty() {
|
||||
return Err(ParseError::ParserError("blocks map is empty".into()));
|
||||
}
|
||||
|
||||
let mut block_pool: HashMap<String, Map> = HashMap::new();
|
||||
let mut parent_lookup: HashMap<String, String> = HashMap::new();
|
||||
|
||||
for (_, value) in blocks_map.iter() {
|
||||
if let Some(block_map) = value.to_map() {
|
||||
if let Some(block_id) = get_block_id(&block_map) {
|
||||
for child_id in collect_child_ids(&block_map) {
|
||||
parent_lookup.insert(child_id, block_id.clone());
|
||||
}
|
||||
block_pool.insert(block_id, block_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let root_block_id = block_pool
|
||||
.iter()
|
||||
.find_map(|(id, block)| {
|
||||
get_flavour(block)
|
||||
.filter(|flavour| flavour == PAGE_FLAVOUR)
|
||||
.map(|_| id.clone())
|
||||
})
|
||||
.ok_or_else(|| ParseError::ParserError("root block not found".into()))?;
|
||||
|
||||
let mut queue: Vec<(Option<String>, String)> = vec![(None, root_block_id.clone())];
|
||||
let mut visited: HashSet<String> = HashSet::from([root_block_id.clone()]);
|
||||
let mut blocks: Vec<BlockInfo> = Vec::with_capacity(block_pool.len());
|
||||
let mut doc_title = String::new();
|
||||
let mut summary = String::new();
|
||||
let mut summary_remaining = SUMMARY_LIMIT as isize;
|
||||
|
||||
while let Some((parent_block_id, block_id)) = queue.pop() {
|
||||
let block = match block_pool.get(&block_id) {
|
||||
Some(block) => block,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let flavour = match get_flavour(block) {
|
||||
Some(flavour) => flavour,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let parent_block = parent_block_id.as_ref().and_then(|id| block_pool.get(id));
|
||||
let parent_flavour = parent_block.and_then(get_flavour);
|
||||
|
||||
let note_block = nearest_by_flavour(&block_id, NOTE_FLAVOUR, &parent_lookup, &block_pool);
|
||||
let note_block_id = note_block.as_ref().and_then(get_block_id);
|
||||
let display_mode = determine_display_mode(note_block.as_ref());
|
||||
|
||||
// enqueue children first to keep traversal order similar to JS implementation
|
||||
let mut child_ids = collect_child_ids(block);
|
||||
for child_id in child_ids.drain(..).rev() {
|
||||
if visited.insert(child_id.clone()) {
|
||||
queue.push((Some(block_id.clone()), child_id));
|
||||
}
|
||||
}
|
||||
|
||||
let build_block = |database_name: Option<&String>| {
|
||||
BlockInfo::base(
|
||||
&block_id,
|
||||
&flavour,
|
||||
parent_flavour.as_ref(),
|
||||
parent_block_id.as_ref(),
|
||||
compose_additional(&display_mode, note_block_id.as_ref(), database_name),
|
||||
)
|
||||
};
|
||||
|
||||
if flavour == PAGE_FLAVOUR {
|
||||
let title = get_string(block, "prop:title").unwrap_or_default();
|
||||
doc_title = title.clone();
|
||||
let mut info = build_block(None);
|
||||
info.content = Some(vec![title]);
|
||||
blocks.push(info);
|
||||
continue;
|
||||
}
|
||||
|
||||
if matches!(
|
||||
flavour.as_str(),
|
||||
"affine:paragraph" | "affine:list" | "affine:code"
|
||||
) {
|
||||
if let Some((text, text_len)) = text_content(block, "prop:text") {
|
||||
let database_name = if flavour == "affine:paragraph"
|
||||
&& parent_flavour.as_deref() == Some("affine:database")
|
||||
{
|
||||
parent_block.and_then(|map| get_string(map, "prop:title"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut info = build_block(database_name.as_ref());
|
||||
info.content = Some(vec![text.clone()]);
|
||||
blocks.push(info);
|
||||
append_summary(&mut summary, &mut summary_remaining, text_len, &text);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if matches!(
|
||||
flavour.as_str(),
|
||||
"affine:embed-linked-doc" | "affine:embed-synced-doc"
|
||||
) {
|
||||
if let Some(page_id) = get_string(block, "prop:pageId") {
|
||||
let mut info = build_block(None);
|
||||
info.ref_doc_id = Some(vec![page_id.clone()]);
|
||||
if let Some(payload) = embed_ref_payload(block, &page_id) {
|
||||
info.ref_info = Some(vec![payload]);
|
||||
}
|
||||
blocks.push(info);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if flavour == "affine:attachment" {
|
||||
if let Some(blob_id) = get_string(block, "prop:sourceId") {
|
||||
let mut info = build_block(None);
|
||||
info.blob = Some(vec![blob_id]);
|
||||
info.content = Some(vec![get_string(block, "prop:name").unwrap_or_default()]);
|
||||
blocks.push(info);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if flavour == "affine:image" {
|
||||
if let Some(blob_id) = get_string(block, "prop:sourceId") {
|
||||
let mut info = build_block(None);
|
||||
info.blob = Some(vec![blob_id]);
|
||||
info.content = Some(vec![get_string(block, "prop:caption").unwrap_or_default()]);
|
||||
blocks.push(info);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if flavour == "affine:surface" {
|
||||
let texts = gather_surface_texts(block);
|
||||
let mut info = build_block(None);
|
||||
info.content = Some(texts);
|
||||
blocks.push(info);
|
||||
continue;
|
||||
}
|
||||
|
||||
if flavour == "affine:database" {
|
||||
let (texts, database_name) = gather_database_texts(block);
|
||||
let mut info = BlockInfo::base(
|
||||
&block_id,
|
||||
&flavour,
|
||||
parent_flavour.as_ref(),
|
||||
parent_block_id.as_ref(),
|
||||
compose_additional(
|
||||
&display_mode,
|
||||
note_block_id.as_ref(),
|
||||
database_name.as_ref(),
|
||||
),
|
||||
);
|
||||
info.content = Some(texts);
|
||||
blocks.push(info);
|
||||
continue;
|
||||
}
|
||||
|
||||
if flavour == "affine:latex" {
|
||||
if let Some(content) = get_string(block, "prop:latex") {
|
||||
let mut info = build_block(None);
|
||||
info.content = Some(vec![content]);
|
||||
blocks.push(info);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if flavour == "affine:table" {
|
||||
let contents = gather_table_contents(block);
|
||||
let mut info = build_block(None);
|
||||
info.content = Some(contents);
|
||||
blocks.push(info);
|
||||
continue;
|
||||
}
|
||||
|
||||
if BOOKMARK_FLAVOURS.contains(&flavour.as_str()) {
|
||||
blocks.push(build_block(None));
|
||||
}
|
||||
}
|
||||
|
||||
if doc_title.is_empty() {
|
||||
doc_title = "Untitled".into();
|
||||
}
|
||||
|
||||
Ok(CrawlResult {
|
||||
blocks,
|
||||
title: doc_title,
|
||||
summary,
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_child_ids(block: &Map) -> Vec<String> {
|
||||
block
|
||||
.get("sys:children")
|
||||
.and_then(|value| value.to_array())
|
||||
.map(|array| {
|
||||
array
|
||||
.iter()
|
||||
.filter_map(|value| value_to_string(&value))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn get_block_id(block: &Map) -> Option<String> {
|
||||
get_string(block, "sys:id")
|
||||
}
|
||||
|
||||
fn get_flavour(block: &Map) -> Option<String> {
|
||||
get_string(block, "sys:flavour")
|
||||
}
|
||||
|
||||
fn get_string(block: &Map, key: &str) -> Option<String> {
|
||||
block.get(key).and_then(|value| value_to_string(&value))
|
||||
}
|
||||
|
||||
fn text_content(block: &Map, key: &str) -> Option<(String, usize)> {
|
||||
block.get(key).and_then(|value| {
|
||||
value.to_text().map(|text| {
|
||||
let content = text.to_string();
|
||||
let len = text.len() as usize;
|
||||
(content, len)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn nearest_by_flavour(
|
||||
start: &str,
|
||||
flavour: &str,
|
||||
parent_lookup: &HashMap<String, String>,
|
||||
blocks: &HashMap<String, Map>,
|
||||
) -> Option<Map> {
|
||||
let mut cursor = Some(start.to_string());
|
||||
while let Some(node) = cursor {
|
||||
if let Some(block) = blocks.get(&node) {
|
||||
if get_flavour(block).as_deref() == Some(flavour) {
|
||||
return Some(block.clone());
|
||||
}
|
||||
}
|
||||
cursor = parent_lookup.get(&node).cloned();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn determine_display_mode(note_block: Option<&Map>) -> String {
|
||||
match note_block.and_then(|block| get_string(block, "prop:displayMode")) {
|
||||
Some(mode) if mode == "both" => "page".into(),
|
||||
Some(mode) => mode,
|
||||
None => "edgeless".into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn compose_additional(
|
||||
display_mode: &str,
|
||||
note_block_id: Option<&String>,
|
||||
database_name: Option<&String>,
|
||||
) -> Option<String> {
|
||||
let mut payload = JsonMap::new();
|
||||
payload.insert(
|
||||
"displayMode".into(),
|
||||
JsonValue::String(display_mode.to_string()),
|
||||
);
|
||||
if let Some(note_id) = note_block_id {
|
||||
payload.insert("noteBlockId".into(), JsonValue::String(note_id.clone()));
|
||||
}
|
||||
if let Some(name) = database_name {
|
||||
payload.insert("databaseName".into(), JsonValue::String(name.clone()));
|
||||
}
|
||||
Some(JsonValue::Object(payload).to_string())
|
||||
}
|
||||
|
||||
fn embed_ref_payload(block: &Map, page_id: &str) -> Option<String> {
|
||||
let mut payload = JsonMap::new();
|
||||
payload.insert("docId".into(), JsonValue::String(page_id.to_string()));
|
||||
|
||||
if let Some(params_value) = block.get("prop:params") {
|
||||
if let Ok(JsonValue::Object(params)) = serde_json::to_value(¶ms_value) {
|
||||
for (key, value) in params.into_iter() {
|
||||
payload.insert(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(JsonValue::Object(payload).to_string())
|
||||
}
|
||||
|
||||
fn gather_surface_texts(block: &Map) -> Vec<String> {
|
||||
let mut texts = Vec::new();
|
||||
let elements = match block.get("prop:elements").and_then(|value| value.to_map()) {
|
||||
Some(map) => map,
|
||||
None => return texts,
|
||||
};
|
||||
|
||||
if elements
|
||||
.get("type")
|
||||
.and_then(|value| value_to_string(&value))
|
||||
.as_deref()
|
||||
!= Some("$blocksuite:internal:native$")
|
||||
{
|
||||
return texts;
|
||||
}
|
||||
|
||||
if let Some(value_map) = elements.get("value").and_then(|value| value.to_map()) {
|
||||
for value in value_map.values() {
|
||||
if let Some(element) = value.to_map() {
|
||||
if let Some(text) = element.get("text").and_then(|value| value.to_text()) {
|
||||
texts.push(text.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
texts.sort();
|
||||
texts
|
||||
}
|
||||
|
||||
fn gather_database_texts(block: &Map) -> (Vec<String>, Option<String>) {
|
||||
let mut texts = Vec::new();
|
||||
let database_title = get_string(block, "prop:title");
|
||||
if let Some(title) = &database_title {
|
||||
texts.push(title.clone());
|
||||
}
|
||||
|
||||
if let Some(columns) = block.get("prop:columns").and_then(|value| value.to_array()) {
|
||||
for column_value in columns.iter() {
|
||||
if let Some(column) = column_value.to_map() {
|
||||
if let Some(name) = get_string(&column, "name") {
|
||||
texts.push(name);
|
||||
}
|
||||
if let Some(data) = column.get("data").and_then(|value| value.to_map()) {
|
||||
if let Some(options) = data.get("options").and_then(|value| value.to_array()) {
|
||||
for option_value in options.iter() {
|
||||
if let Some(option) = option_value.to_map() {
|
||||
if let Some(value) = get_string(&option, "value") {
|
||||
texts.push(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(texts, database_title)
|
||||
}
|
||||
|
||||
fn gather_table_contents(block: &Map) -> Vec<String> {
|
||||
let mut contents = Vec::new();
|
||||
for key in block.keys() {
|
||||
if key.starts_with("prop:cells.") && key.ends_with(".text") {
|
||||
if let Some(value) = block.get(key).and_then(|value| value_to_string(&value)) {
|
||||
if !value.is_empty() {
|
||||
contents.push(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
contents
|
||||
}
|
||||
|
||||
fn value_to_string(value: &Value) -> Option<String> {
|
||||
if let Some(text) = value.to_text() {
|
||||
return Some(text.to_string());
|
||||
}
|
||||
|
||||
if let Some(any) = value.to_any() {
|
||||
return any_to_string(&any);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn any_to_string(any: &Any) -> Option<String> {
|
||||
match any {
|
||||
Any::String(value) => Some(value.to_string()),
|
||||
Any::Integer(value) => Some(value.to_string()),
|
||||
Any::Float32(value) => Some(value.0.to_string()),
|
||||
Any::Float64(value) => Some(value.0.to_string()),
|
||||
Any::BigInt64(value) => Some(value.to_string()),
|
||||
Any::True => Some("true".into()),
|
||||
Any::False => Some("false".into()),
|
||||
Any::Null | Any::Undefined => None,
|
||||
Any::Array(_) | Any::Object(_) | Any::Binary(_) => serde_json::to_string(any).ok(),
|
||||
}
|
||||
}
|
||||
|
||||
fn append_summary(summary: &mut String, remaining: &mut isize, text_len: usize, text: &str) {
|
||||
if *remaining > 0 {
|
||||
summary.push_str(text);
|
||||
*remaining -= text_len as isize;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_doc_from_binary() {
|
||||
let json = include_bytes!("../fixtures/demo.ydoc.json");
|
||||
let input = CrawlDocInput {
|
||||
doc_bin: include_bytes!("../fixtures/demo.ydoc").to_vec(),
|
||||
root_doc_bin: None,
|
||||
space_id: "o9WCLGyxkLxdULZ-f2B9V".to_string(),
|
||||
doc_id: "dYpV7PPhk8amRkY5IAcVO".to_string(),
|
||||
};
|
||||
|
||||
let result = parse_doc_from_binary(input).unwrap();
|
||||
let config = assert_json_diff::Config::new(assert_json_diff::CompareMode::Strict)
|
||||
.numeric_mode(assert_json_diff::NumericMode::AssumeFloat);
|
||||
assert_json_diff::assert_json_matches!(
|
||||
serde_json::from_slice::<serde_json::Value>(json).unwrap(),
|
||||
serde_json::json!(result),
|
||||
config
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
#[cfg(feature = "doc-loader")]
|
||||
pub mod doc_loader;
|
||||
#[cfg(feature = "ydoc-loader")]
|
||||
pub mod doc_parser;
|
||||
#[cfg(feature = "hashcash")]
|
||||
pub mod hashcash;
|
||||
|
||||
@@ -8,8 +8,8 @@ authors = [
|
||||
description = "High-performance and thread-safe CRDT implementation compatible with Yjs"
|
||||
edition = "2021"
|
||||
homepage = "https://github.com/toeverything/y-octo"
|
||||
include = ["src/**/*", "benches/**/*", "bin/**/*", "LICENSE", "README.md"]
|
||||
keywords = ["collaboration", "crdt", "crdts", "yjs", "yata"]
|
||||
include = ["LICENSE", "README.md", "benches/**/*", "bin/**/*", "src/**/*"]
|
||||
keywords = ["collaboration", "crdt", "crdts", "yata", "yjs"]
|
||||
license = "MIT"
|
||||
name = "y-octo"
|
||||
readme = "README.md"
|
||||
|
||||
@@ -26,7 +26,13 @@ uniffi = { workspace = true, features = ["cli", "tokio"] }
|
||||
|
||||
[target.'cfg(any(target_os = "ios", target_os = "macos"))'.dependencies]
|
||||
objc2 = { workspace = true }
|
||||
objc2-foundation = { workspace = true, features = ["NSArray", "NSFileManager", "NSPathUtilities", "NSString", "NSURL"] }
|
||||
objc2-foundation = { workspace = true, features = [
|
||||
"NSArray",
|
||||
"NSFileManager",
|
||||
"NSPathUtilities",
|
||||
"NSString",
|
||||
"NSURL",
|
||||
] }
|
||||
|
||||
[target.'cfg(not(any(target_os = "ios", target_os = "macos")))'.dependencies]
|
||||
homedir = { workspace = true }
|
||||
|
||||
@@ -14,10 +14,25 @@ affine_sqlite_v1 = { path = "./sqlite_v1" }
|
||||
napi = { workspace = true }
|
||||
napi-derive = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
sqlx = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = { workspace = true }
|
||||
sqlx = { workspace = true, default-features = false, features = ["chrono", "json", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"json",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
@@ -15,7 +15,14 @@ anyhow = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
napi = { workspace = true }
|
||||
napi-derive = { workspace = true }
|
||||
sqlx = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
@@ -26,5 +33,12 @@ uniffi = { workspace = true }
|
||||
affine_schema = { path = "../schema" }
|
||||
dotenvy = { workspace = true }
|
||||
napi-build = { workspace = true }
|
||||
sqlx = { workspace = true, default-features = false, features = ["chrono", "json", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
@@ -12,12 +12,26 @@ anyhow = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
napi = { workspace = true }
|
||||
napi-derive = { workspace = true }
|
||||
sqlx = { workspace = true, default-features = false, features = ["chrono", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
[build-dependencies]
|
||||
affine_schema = { path = "../schema" }
|
||||
dotenvy = { workspace = true }
|
||||
napi-build = { workspace = true }
|
||||
sqlx = { workspace = true, default-features = false, features = ["chrono", "json", "macros", "migrate", "runtime-tokio", "sqlite", "tls-rustls"] }
|
||||
sqlx = { workspace = true, default-features = false, features = [
|
||||
"chrono",
|
||||
"macros",
|
||||
"migrate",
|
||||
"runtime-tokio",
|
||||
"sqlite",
|
||||
"tls-rustls",
|
||||
] }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
Reference in New Issue
Block a user