From 14357f2c96cf4dada922f25f8181177e108f0155 Mon Sep 17 00:00:00 2001 From: alexpasmantier Date: Tue, 22 Oct 2024 14:50:35 +0200 Subject: [PATCH] caching for git repos and max_lines_in_memory for text --- crates/television/channels/git_repos.rs | 13 +++++++++---- crates/television/channels/text.rs | 13 ++++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/crates/television/channels/git_repos.rs b/crates/television/channels/git_repos.rs index 3d953b1..de59ee6 100644 --- a/crates/television/channels/git_repos.rs +++ b/crates/television/channels/git_repos.rs @@ -30,9 +30,9 @@ pub struct Channel { running: bool, icon: FileIcon, crawl_handle: JoinHandle<()>, - entry_cache: Arc>>, + git_dirs_cache: Arc>>, // TODO: implement cache validation/invalidation - cache_valid: bool, + cache_valid: Arc>, } impl Channel { @@ -44,12 +44,14 @@ impl Channel { 1, ); let entry_cache = Arc::new(Mutex::new(HashSet::new())); + let cache_valid = Arc::new(Mutex::new(false)); // start loading files in the background // PERF: store the results somewhere in a cache let crawl_handle = tokio::spawn(crawl_for_repos( std::env::home_dir().expect("Could not get home directory"), matcher.injector(), entry_cache.clone(), + cache_valid.clone(), )); Channel { matcher, @@ -59,8 +61,8 @@ impl Channel { running: false, icon: FileIcon::from("git"), crawl_handle, - entry_cache, - cache_valid: false, + git_dirs_cache: entry_cache, + cache_valid, } } @@ -157,6 +159,7 @@ async fn crawl_for_repos( starting_point: std::path::PathBuf, injector: nucleo::Injector, entry_cache: Arc>>, + cache_valid: Arc>, ) { let mut walker_overrides_builder = OverrideBuilder::new(&starting_point); walker_overrides_builder.add(".git").unwrap(); @@ -199,4 +202,6 @@ async fn crawl_for_repos( ignore::WalkState::Continue }) }); + + *cache_valid.lock() = true; } diff --git a/crates/television/channels/text.rs b/crates/television/channels/text.rs index 0ea8527..1c1ee88 100644 --- a/crates/television/channels/text.rs +++ b/crates/television/channels/text.rs @@ -7,7 +7,7 @@ use std::{ fs::File, io::{BufRead, Read, Seek}, path::{Path, PathBuf}, - sync::Arc, + sync::{atomic::AtomicUsize, Arc}, }; use tracing::{debug, info}; @@ -172,16 +172,24 @@ impl OnAir for Channel { /// a lot of files (e.g. starting tv in $HOME). const MAX_FILE_SIZE: u64 = 4 * 1024 * 1024; +const MAX_IN_MEMORY_LINES: usize = 5_000_000; + #[allow(clippy::unused_async)] async fn load_candidates(path: PathBuf, injector: Injector) { let current_dir = std::env::current_dir().unwrap(); let walker = walk_builder(&path, *DEFAULT_NUM_THREADS, None).build_parallel(); + let lines_in_mem = Arc::new(AtomicUsize::new(0)); + walker.run(|| { let injector = injector.clone(); let current_dir = current_dir.clone(); + let lines_in_mem = lines_in_mem.clone(); Box::new(move |result| { + if lines_in_mem.load(std::sync::atomic::Ordering::Relaxed) > MAX_IN_MEMORY_LINES { + return ignore::WalkState::Quit; + } if let Ok(entry) = result { if entry.file_type().unwrap().is_file() { if let Ok(m) = entry.metadata() { @@ -192,6 +200,7 @@ async fn load_candidates(path: PathBuf, injector: Injector) { // iterate over the lines of the file match File::open(entry.path()) { Ok(file) => { + // is the file a text-based file? let mut reader = std::io::BufReader::new(&file); let mut buffer = [0u8; 128]; match reader.read(&mut buffer) { @@ -212,6 +221,7 @@ async fn load_candidates(path: PathBuf, injector: Injector) { return ignore::WalkState::Continue; } } + // read the lines of the file let mut line_number = 0; for maybe_line in reader.lines() { match maybe_line { @@ -238,6 +248,7 @@ async fn load_candidates(path: PathBuf, injector: Injector) { c.line.clone().into(); }, ); + lines_in_mem.fetch_add(1, std::sync::atomic::Ordering::Relaxed); } Err(e) => { info!("Error reading line: {:?}", e);