caching for git repos and max_lines_in_memory for text

This commit is contained in:
alexpasmantier 2024-10-22 14:50:35 +02:00
parent 9d6d1f47ba
commit 14357f2c96
2 changed files with 21 additions and 5 deletions

View File

@ -30,9 +30,9 @@ pub struct Channel {
running: bool,
icon: FileIcon,
crawl_handle: JoinHandle<()>,
entry_cache: Arc<Mutex<HashSet<String>>>,
git_dirs_cache: Arc<Mutex<HashSet<String>>>,
// TODO: implement cache validation/invalidation
cache_valid: bool,
cache_valid: Arc<Mutex<bool>>,
}
impl Channel {
@ -44,12 +44,14 @@ impl Channel {
1,
);
let entry_cache = Arc::new(Mutex::new(HashSet::new()));
let cache_valid = Arc::new(Mutex::new(false));
// start loading files in the background
// PERF: store the results somewhere in a cache
let crawl_handle = tokio::spawn(crawl_for_repos(
std::env::home_dir().expect("Could not get home directory"),
matcher.injector(),
entry_cache.clone(),
cache_valid.clone(),
));
Channel {
matcher,
@ -59,8 +61,8 @@ impl Channel {
running: false,
icon: FileIcon::from("git"),
crawl_handle,
entry_cache,
cache_valid: false,
git_dirs_cache: entry_cache,
cache_valid,
}
}
@ -157,6 +159,7 @@ async fn crawl_for_repos(
starting_point: std::path::PathBuf,
injector: nucleo::Injector<DirEntry>,
entry_cache: Arc<Mutex<HashSet<String>>>,
cache_valid: Arc<Mutex<bool>>,
) {
let mut walker_overrides_builder = OverrideBuilder::new(&starting_point);
walker_overrides_builder.add(".git").unwrap();
@ -199,4 +202,6 @@ async fn crawl_for_repos(
ignore::WalkState::Continue
})
});
*cache_valid.lock() = true;
}

View File

@ -7,7 +7,7 @@ use std::{
fs::File,
io::{BufRead, Read, Seek},
path::{Path, PathBuf},
sync::Arc,
sync::{atomic::AtomicUsize, Arc},
};
use tracing::{debug, info};
@ -172,16 +172,24 @@ impl OnAir for Channel {
/// a lot of files (e.g. starting tv in $HOME).
const MAX_FILE_SIZE: u64 = 4 * 1024 * 1024;
const MAX_IN_MEMORY_LINES: usize = 5_000_000;
#[allow(clippy::unused_async)]
async fn load_candidates(path: PathBuf, injector: Injector<CandidateLine>) {
let current_dir = std::env::current_dir().unwrap();
let walker =
walk_builder(&path, *DEFAULT_NUM_THREADS, None).build_parallel();
let lines_in_mem = Arc::new(AtomicUsize::new(0));
walker.run(|| {
let injector = injector.clone();
let current_dir = current_dir.clone();
let lines_in_mem = lines_in_mem.clone();
Box::new(move |result| {
if lines_in_mem.load(std::sync::atomic::Ordering::Relaxed) > MAX_IN_MEMORY_LINES {
return ignore::WalkState::Quit;
}
if let Ok(entry) = result {
if entry.file_type().unwrap().is_file() {
if let Ok(m) = entry.metadata() {
@ -192,6 +200,7 @@ async fn load_candidates(path: PathBuf, injector: Injector<CandidateLine>) {
// iterate over the lines of the file
match File::open(entry.path()) {
Ok(file) => {
// is the file a text-based file?
let mut reader = std::io::BufReader::new(&file);
let mut buffer = [0u8; 128];
match reader.read(&mut buffer) {
@ -212,6 +221,7 @@ async fn load_candidates(path: PathBuf, injector: Injector<CandidateLine>) {
return ignore::WalkState::Continue;
}
}
// read the lines of the file
let mut line_number = 0;
for maybe_line in reader.lines() {
match maybe_line {
@ -238,6 +248,7 @@ async fn load_candidates(path: PathBuf, injector: Injector<CandidateLine>) {
c.line.clone().into();
},
);
lines_in_mem.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
Err(e) => {
info!("Error reading line: {:?}", e);