mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 21:05:19 +00:00
feat(server): context awareness for copilot (#9611)
fix PD-2167 fix PD-2169 fix PD-2190
This commit is contained in:
@@ -17,9 +17,11 @@ impl Document {
|
||||
fn chunks(&self, env: Env) -> Result<JsObject> {
|
||||
let mut array = env.create_array_with_length(self.inner.chunks.len())?;
|
||||
for (i, chunk) in self.inner.chunks.iter().enumerate() {
|
||||
let content = crate::utils::clean_content(&chunk.content);
|
||||
|
||||
let mut obj = env.create_object()?;
|
||||
obj.set_named_property("index", i as i64)?;
|
||||
obj.set_named_property("content", chunk.content.clone())?;
|
||||
obj.set_named_property("content", content)?;
|
||||
array.set_element(i as u32, obj)?;
|
||||
}
|
||||
Ok(array)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#![deny(clippy::all)]
|
||||
|
||||
mod utils;
|
||||
|
||||
pub mod doc_loader;
|
||||
pub mod file_type;
|
||||
pub mod hashcash;
|
||||
|
||||
111
packages/backend/native/src/utils.rs
Normal file
111
packages/backend/native/src/utils.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
fn collapse_whitespace(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let mut prev_was_whitespace = false;
|
||||
for c in s.chars() {
|
||||
if c.is_whitespace() {
|
||||
if !prev_was_whitespace {
|
||||
result.push(' ');
|
||||
prev_was_whitespace = true;
|
||||
}
|
||||
} else {
|
||||
result.push(c);
|
||||
prev_was_whitespace = false;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn try_remove_label(s: &str, i: usize) -> Option<usize> {
|
||||
let mut next_idx = match s[i..].to_ascii_lowercase() {
|
||||
s if s.starts_with("figure") => i + 6,
|
||||
s if s.starts_with("table") => i + 5,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
if next_idx >= s.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(ch) = s[next_idx..].chars().next() {
|
||||
if !ch.is_whitespace() {
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
|
||||
while next_idx < s.len() {
|
||||
let ch = s[next_idx..].chars().next()?;
|
||||
if ch.is_whitespace() {
|
||||
next_idx += ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let start_digits = next_idx;
|
||||
while next_idx < s.len() {
|
||||
let ch = s[next_idx..].chars().next()?;
|
||||
if ch.is_ascii_digit() {
|
||||
next_idx += ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if next_idx == start_digits {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(ch) = s[next_idx..].chars().next() {
|
||||
if ch == '.' {
|
||||
next_idx += ch.len_utf8();
|
||||
return Some(next_idx);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn remove_label(s: &str) -> String {
|
||||
let mut result = String::with_capacity(s.len());
|
||||
let mut i = 0;
|
||||
while i < s.len() {
|
||||
if let Some(next_idx) = try_remove_label(s, i) {
|
||||
i = next_idx;
|
||||
continue;
|
||||
}
|
||||
|
||||
let ch = s[i..].chars().next().unwrap();
|
||||
result.push(ch);
|
||||
i += ch.len_utf8();
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn clean_content(content: &str) -> String {
|
||||
let content = content.replace("\x00", "");
|
||||
remove_label(&collapse_whitespace(&content))
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_clean_input() {
|
||||
let inputs = [
|
||||
"FIGURE 1. This is a\t test\n\nwith multiple lines",
|
||||
"table 2. Another test\x00 with null",
|
||||
"Some text \t\n without label",
|
||||
];
|
||||
let cleaned = [
|
||||
"This is a test with multiple lines",
|
||||
"Another test with null",
|
||||
"Some text without label",
|
||||
];
|
||||
|
||||
assert_eq!(cleaned, inputs.map(clean_content));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user