fix(server): catch panic for context parsing (#10912)

fix AF-2335
fix CLOUD-173
This commit is contained in:
darkskygit
2025-03-17 09:44:57 +00:00
parent b401012d85
commit 92db9a693a
4 changed files with 129 additions and 71 deletions

164
Cargo.lock generated
View File

@@ -26,6 +26,17 @@ dependencies = [
"pom",
]
[[package]]
name = "aes"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
]
[[package]]
name = "affine_common"
version = "0.1.0"
@@ -477,6 +488,15 @@ dependencies = [
"generic-array",
]
[[package]]
name = "block-padding"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
dependencies = [
"generic-array",
]
[[package]]
name = "block2"
version = "0.6.0"
@@ -518,6 +538,12 @@ version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytecount"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
[[package]]
name = "bytemuck"
version = "1.22.0"
@@ -574,6 +600,15 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cbc"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
dependencies = [
"cipher",
]
[[package]]
name = "cc"
version = "1.2.16"
@@ -589,7 +624,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
"nom 7.1.3",
]
[[package]]
@@ -671,6 +706,16 @@ dependencies = [
"half",
]
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
]
[[package]]
name = "clang-sys"
version = "1.8.1"
@@ -1015,15 +1060,6 @@ dependencies = [
"zeroize",
]
[[package]]
name = "deranged"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
dependencies = [
"powerfmt",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.1"
@@ -1146,6 +1182,15 @@ version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055"
[[package]]
name = "ecb"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a8bfa975b1aec2145850fcaa1c6fe269a16578c44705a532ae3edc92b8881c7"
dependencies = [
"cipher",
]
[[package]]
name = "either"
version = "1.15.0"
@@ -1825,6 +1870,16 @@ dependencies = [
"cfb",
]
[[package]]
name = "inout"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
dependencies = [
"block-padding",
"generic-array",
]
[[package]]
name = "io-surface"
version = "0.16.0"
@@ -2015,19 +2070,27 @@ dependencies = [
[[package]]
name = "lopdf"
version = "0.34.0"
version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5c8ecfc6c72051981c0459f75ccc585e7ff67c70829560cda8e647882a9abff"
checksum = "59fa2559e99ba0f26a12458aabc754432c805bbb8cba516c427825a997af1fb7"
dependencies = [
"aes",
"bitflags 2.9.0",
"cbc",
"ecb",
"encoding_rs",
"flate2",
"indexmap",
"itoa",
"log",
"md-5",
"nom",
"nom 8.0.0",
"nom_locate",
"rand 0.9.0",
"rangemap",
"time",
"sha2",
"stringprep",
"thiserror 2.0.12",
"weezl",
]
@@ -2263,6 +2326,26 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]
[[package]]
name = "nom_locate"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d"
dependencies = [
"bytecount",
"memchr",
"nom 8.0.0",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@@ -2299,12 +2382,6 @@ dependencies = [
"num-traits",
]
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
@@ -2513,13 +2590,13 @@ checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42"
[[package]]
name = "pdf-extract"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87aa267a18864f2f75471f6d316ea430f13e78f0b5a882ce261ebbdfd389a76a"
source = "git+https://github.com/toeverything/pdf-extract#49ef7d2aec5bb495467a40082cd9717e849ee29a"
dependencies = [
"adobe-cmap-parser",
"cff-parser",
"encoding_rs",
"euclid",
"log",
"lopdf",
"postscript",
"type1-encoding-parser",
@@ -2655,12 +2732,6 @@ version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306"
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.20"
@@ -2949,7 +3020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93f9a866e2e00a7a1fb27e46e9e324a6f7c0e7edc4543cae1d38f4e4a100c610"
dependencies = [
"memchr",
"nom",
"nom 7.1.3",
"serde",
]
@@ -3986,37 +4057,6 @@ dependencies = [
"rustc-hash 1.1.0",
]
[[package]]
name = "time"
version = "0.3.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dad298b01a40a23aac4580b67e3dbedb7cc8402f3592d7f49469de2ea4aecdd8"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "765c97a5b985b7c11d7bc27fa927dc4fe6af3a6dfb021d28deb60d3bf51e76ef"
[[package]]
name = "time-macros"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8093bc3e81c3bc5f7879de09619d06c9a5a5e45ca44dfeeb7225bae38005c5c"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinystr"
version = "0.7.6"
@@ -4662,7 +4702,7 @@ version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "998d2c24ec099a87daf9467808859f9d82b61f1d9c9701251aea037f514eae0e"
dependencies = [
"nom",
"nom 7.1.3",
]
[[package]]
@@ -5068,7 +5108,7 @@ dependencies = [
"log",
"loom",
"nanoid",
"nom",
"nom 7.1.3",
"ordered-float",
"rand 0.8.5",
"rand_chacha 0.3.1",

View File

@@ -41,7 +41,7 @@ objc2-foundation = "0.3"
once_cell = "1"
parking_lot = "0.12"
path-ext = "0.1.1"
pdf-extract = "0.8.2"
pdf-extract = { git = "https://github.com/toeverything/pdf-extract" }
rand = "0.9"
rayon = "1.10"
readability = { version = "0.3.0", default-features = false }

View File

@@ -10,6 +10,7 @@ import {
Config,
EventBus,
JobQueue,
mapAnyError,
OnEvent,
OnJob,
} from '../../../base';
@@ -160,10 +161,11 @@ export class CopilotContextDocJob implements OnModuleInit {
chunkSize: total,
});
} catch (e: any) {
this.logger.error(
`Failed to embed pending file: ${contextId}::${fileId}`,
e
);
const error = mapAnyError(e);
error.log('CopilotJob', {
workspaceId,
fileId,
});
this.event.emit('workspace.file.embed.failed', {
contextId,

View File

@@ -1,4 +1,8 @@
use std::{io::Cursor, path::PathBuf};
use std::{
io::Cursor,
panic::{catch_unwind, AssertUnwindSafe},
path::PathBuf,
};
use path_ext::PathExt;
@@ -81,16 +85,28 @@ impl Doc {
fn from_loader(
file_path: &str,
loader: impl Loader,
loader: impl Loader + 'static,
splitter: impl TextSplitter + 'static,
) -> Result<Doc, LoaderError> {
let name = file_path.to_string();
let chunks = Self::get_chunks_from_loader(loader, splitter)?;
let chunks = catch_unwind(AssertUnwindSafe(|| {
Self::get_chunks_from_loader(loader, splitter)
}))
.map_err(|e| {
LoaderError::Other(match e.downcast::<String>() {
Ok(v) => *v,
Err(e) => match e.downcast::<&str>() {
Ok(v) => v.to_string(),
_ => "Unknown Source of Error".to_owned(),
},
})
})??;
Ok(Self { name, chunks })
}
fn get_chunks_from_loader(
loader: impl Loader,
loader: impl Loader + 'static,
splitter: impl TextSplitter + 'static,
) -> Result<Vec<Chunk>, LoaderError> {
let docs = loader.load_and_split(splitter)?;