fix(channels): use the number of actual bytes read and not the sample buffer size when calculating the proportion of printable ASCII characters (#174)

Fixes #173
This commit is contained in:
Alex Pasmantier 2024-12-30 00:22:04 +01:00 committed by GitHub
parent e9c3ebf05f
commit 08fa41b06c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,7 +13,7 @@ use television_utils::files::{walk_builder, DEFAULT_NUM_THREADS};
use television_utils::strings::{ use television_utils::strings::{
proportion_of_printable_ascii_characters, PRINTABLE_ASCII_THRESHOLD, proportion_of_printable_ascii_characters, PRINTABLE_ASCII_THRESHOLD,
}; };
use tracing::{debug, warn}; use tracing::{debug, trace, warn};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct CandidateLine { struct CandidateLine {
@ -294,15 +294,18 @@ fn try_inject_lines(
let mut buffer = [0u8; 128]; let mut buffer = [0u8; 128];
match reader.read(&mut buffer) { match reader.read(&mut buffer) {
Ok(bytes_read) => { Ok(bytes_read) => {
if (bytes_read == 0) if bytes_read == 0
|| proportion_of_printable_ascii_characters(&buffer) || proportion_of_printable_ascii_characters(
< PRINTABLE_ASCII_THRESHOLD &buffer[..bytes_read],
) < PRINTABLE_ASCII_THRESHOLD
{ {
debug!("Skipping non-text file {:?}", path);
return None; return None;
} }
reader.seek(std::io::SeekFrom::Start(0)).unwrap(); reader.seek(std::io::SeekFrom::Start(0)).unwrap();
} }
Err(_) => { Err(e) => {
warn!("Error reading file {:?}: {:?}", path, e);
return None; return None;
} }
} }
@ -314,7 +317,7 @@ fn try_inject_lines(
Ok(l) => { Ok(l) => {
line_number += 1; line_number += 1;
if l.is_empty() { if l.is_empty() {
debug!("Empty line"); trace!("Empty line");
continue; continue;
} }
let candidate = CandidateLine::new( let candidate = CandidateLine::new(