mirror of
https://github.com/alexpasmantier/television.git
synced 2025-06-06 11:35:25 +00:00
a more sensible method to detect text files
This commit is contained in:
parent
49a3948b51
commit
d2213af480
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -2153,6 +2153,15 @@ version = "0.1.24"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc_version"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
|
||||||
|
dependencies = [
|
||||||
|
"semver",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustix"
|
name = "rustix"
|
||||||
version = "0.38.37"
|
version = "0.38.37"
|
||||||
@ -2419,7 +2428,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "television"
|
name = "television"
|
||||||
version = "0.1.5"
|
version = "0.1.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"better-panic",
|
"better-panic",
|
||||||
@ -2832,6 +2841,7 @@ dependencies = [
|
|||||||
"cargo_metadata",
|
"cargo_metadata",
|
||||||
"derive_builder",
|
"derive_builder",
|
||||||
"regex",
|
"regex",
|
||||||
|
"rustc_version",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
"time",
|
"time",
|
||||||
"vergen-lib",
|
"vergen-lib",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "television"
|
name = "television"
|
||||||
version = "0.1.5"
|
version = "0.1.6"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "The revolution will be televised."
|
description = "The revolution will be televised."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@ -72,7 +72,7 @@ pretty_assertions = "1.4.1"
|
|||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
vergen-gix = { version = "1.0.0", features = ["build", "cargo"] }
|
vergen-gix = { version = "1.0.0", features = ["build", "cargo", "rustc"] }
|
||||||
|
|
||||||
|
|
||||||
[profile.staging]
|
[profile.staging]
|
||||||
|
12
build.rs
12
build.rs
@ -1,13 +1,15 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use vergen_gix::{BuildBuilder, CargoBuilder, Emitter, GixBuilder};
|
use vergen_gix::{
|
||||||
|
BuildBuilder, CargoBuilder, Emitter, GixBuilder, RustcBuilder,
|
||||||
|
};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let build = BuildBuilder::all_build()?;
|
let build = BuildBuilder::default().build_date(true).build()?;
|
||||||
let gix = GixBuilder::all_git()?;
|
let cargo = CargoBuilder::default().target_triple(true).build()?;
|
||||||
let cargo = CargoBuilder::all_cargo()?;
|
let rustc = RustcBuilder::default().semver(true).build()?;
|
||||||
Emitter::default()
|
Emitter::default()
|
||||||
.add_instructions(&build)?
|
.add_instructions(&build)?
|
||||||
.add_instructions(&gix)?
|
|
||||||
.add_instructions(&cargo)?
|
.add_instructions(&cargo)?
|
||||||
|
.add_instructions(&rustc)?
|
||||||
.emit()
|
.emit()
|
||||||
}
|
}
|
||||||
|
@ -3,15 +3,17 @@ use nucleo::{
|
|||||||
pattern::{CaseMatching, Normalization},
|
pattern::{CaseMatching, Normalization},
|
||||||
Config, Injector, Nucleo,
|
Config, Injector, Nucleo,
|
||||||
};
|
};
|
||||||
use std::{path::PathBuf, sync::Arc};
|
use std::{os::unix::ffi::OsStrExt, path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
use ignore::DirEntry;
|
use ignore::DirEntry;
|
||||||
|
|
||||||
use super::TelevisionChannel;
|
use super::TelevisionChannel;
|
||||||
use crate::entry::Entry;
|
|
||||||
use crate::fuzzy::MATCHER;
|
|
||||||
use crate::previewers::PreviewType;
|
use crate::previewers::PreviewType;
|
||||||
use crate::utils::files::{walk_builder, DEFAULT_NUM_THREADS};
|
use crate::utils::files::{walk_builder, DEFAULT_NUM_THREADS};
|
||||||
|
use crate::{
|
||||||
|
entry::Entry, utils::strings::proportion_of_printable_ascii_characters,
|
||||||
|
};
|
||||||
|
use crate::{fuzzy::MATCHER, utils::strings::PRINTABLE_ASCII_THRESHOLD};
|
||||||
|
|
||||||
pub(crate) struct Channel {
|
pub(crate) struct Channel {
|
||||||
matcher: Nucleo<DirEntry>,
|
matcher: Nucleo<DirEntry>,
|
||||||
@ -19,6 +21,8 @@ pub(crate) struct Channel {
|
|||||||
result_count: u32,
|
result_count: u32,
|
||||||
total_count: u32,
|
total_count: u32,
|
||||||
running: bool,
|
running: bool,
|
||||||
|
// TODO: cache results (to make deleting characters smoother) but like
|
||||||
|
// a shallow cache (maybe more like a stack actually? so we just pop result sets)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Channel {
|
impl Channel {
|
||||||
@ -131,6 +135,13 @@ async fn load_files(path: PathBuf, injector: Injector<DirEntry>) {
|
|||||||
if let Ok(entry) = result {
|
if let Ok(entry) = result {
|
||||||
if entry.file_type().unwrap().is_file() {
|
if entry.file_type().unwrap().is_file() {
|
||||||
// Send the path via the async channel
|
// Send the path via the async channel
|
||||||
|
let file_name = entry.file_name();
|
||||||
|
if proportion_of_printable_ascii_characters(
|
||||||
|
file_name.as_bytes(),
|
||||||
|
) < PRINTABLE_ASCII_THRESHOLD
|
||||||
|
{
|
||||||
|
return ignore::WalkState::Continue;
|
||||||
|
}
|
||||||
let _ = injector.push(entry, |e, cols| {
|
let _ = injector.push(entry, |e, cols| {
|
||||||
cols[0] = e
|
cols[0] = e
|
||||||
.path()
|
.path()
|
||||||
|
@ -13,13 +13,15 @@ use std::{
|
|||||||
use tracing::{debug, info};
|
use tracing::{debug, info};
|
||||||
|
|
||||||
use super::TelevisionChannel;
|
use super::TelevisionChannel;
|
||||||
use crate::entry::Entry;
|
|
||||||
use crate::fuzzy::MATCHER;
|
|
||||||
use crate::previewers::PreviewType;
|
use crate::previewers::PreviewType;
|
||||||
use crate::utils::{
|
use crate::utils::{
|
||||||
files::{is_not_text, is_valid_utf8, walk_builder, DEFAULT_NUM_THREADS},
|
files::{is_not_text, is_valid_utf8, walk_builder, DEFAULT_NUM_THREADS},
|
||||||
strings::preprocess_line,
|
strings::preprocess_line,
|
||||||
};
|
};
|
||||||
|
use crate::{
|
||||||
|
entry::Entry, utils::strings::proportion_of_printable_ascii_characters,
|
||||||
|
};
|
||||||
|
use crate::{fuzzy::MATCHER, utils::strings::PRINTABLE_ASCII_THRESHOLD};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct CandidateLine {
|
struct CandidateLine {
|
||||||
@ -184,7 +186,8 @@ async fn load_candidates(path: PathBuf, injector: Injector<CandidateLine>) {
|
|||||||
if (bytes_read == 0)
|
if (bytes_read == 0)
|
||||||
|| is_not_text(&buffer)
|
|| is_not_text(&buffer)
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
|| !is_valid_utf8(&buffer)
|
|| proportion_of_printable_ascii_characters(&buffer)
|
||||||
|
< PRINTABLE_ASCII_THRESHOLD
|
||||||
{
|
{
|
||||||
return ignore::WalkState::Continue;
|
return ignore::WalkState::Continue;
|
||||||
}
|
}
|
||||||
|
@ -21,8 +21,10 @@ pub(crate) struct Cli {
|
|||||||
|
|
||||||
const VERSION_MESSAGE: &str = concat!(
|
const VERSION_MESSAGE: &str = concat!(
|
||||||
env!("CARGO_PKG_VERSION"),
|
env!("CARGO_PKG_VERSION"),
|
||||||
"-",
|
"\ntarget triple: ",
|
||||||
env!("VERGEN_GIT_DESCRIBE"),
|
env!("VERGEN_CARGO_TARGET_TRIPLE"),
|
||||||
|
"\nbuild: ",
|
||||||
|
env!("VERGEN_RUSTC_SEMVER"),
|
||||||
" (",
|
" (",
|
||||||
env!("VERGEN_BUILD_DATE"),
|
env!("VERGEN_BUILD_DATE"),
|
||||||
")"
|
")"
|
||||||
|
@ -17,11 +17,11 @@ use tracing::{debug, warn};
|
|||||||
|
|
||||||
use crate::entry;
|
use crate::entry;
|
||||||
use crate::previewers::{Preview, PreviewContent};
|
use crate::previewers::{Preview, PreviewContent};
|
||||||
use crate::utils::files::is_valid_utf8;
|
|
||||||
use crate::utils::files::FileType;
|
use crate::utils::files::FileType;
|
||||||
use crate::utils::files::{get_file_size, is_known_text_extension};
|
use crate::utils::files::{get_file_size, is_known_text_extension};
|
||||||
use crate::utils::strings::{
|
use crate::utils::strings::{
|
||||||
preprocess_line, proportion_of_printable_ascii_characters,
|
preprocess_line, proportion_of_printable_ascii_characters,
|
||||||
|
PRINTABLE_ASCII_THRESHOLD,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::cache::PreviewCache;
|
use super::cache::PreviewCache;
|
||||||
@ -105,7 +105,8 @@ impl FilePreviewer {
|
|||||||
FileType::Image => {
|
FileType::Image => {
|
||||||
debug!("Previewing image file: {:?}", entry.name);
|
debug!("Previewing image file: {:?}", entry.name);
|
||||||
// insert a loading preview into the cache
|
// insert a loading preview into the cache
|
||||||
let preview = loading(&entry.name);
|
//let preview = loading(&entry.name);
|
||||||
|
let preview = not_supported(&entry.name);
|
||||||
self.cache_preview(entry.name.clone(), preview.clone())
|
self.cache_preview(entry.name.clone(), preview.clone())
|
||||||
.await;
|
.await;
|
||||||
//// compute the image preview in the background
|
//// compute the image preview in the background
|
||||||
@ -199,9 +200,6 @@ impl FilePreviewer {
|
|||||||
/// 4 MB
|
/// 4 MB
|
||||||
const MAX_FILE_SIZE: u64 = 4 * 1024 * 1024;
|
const MAX_FILE_SIZE: u64 = 4 * 1024 * 1024;
|
||||||
|
|
||||||
/// The proportion of printable ascii characters that a file must have to be considered text.
|
|
||||||
const PRINTABLE_ASCII_THRESHOLD: f32 = 0.9;
|
|
||||||
|
|
||||||
fn get_file_type(&self, path: &Path) -> FileType {
|
fn get_file_type(&self, path: &Path) -> FileType {
|
||||||
debug!("Getting file type for {:?}", path);
|
debug!("Getting file type for {:?}", path);
|
||||||
let mut file_type = match infer::get_from_path(path) {
|
let mut file_type = match infer::get_from_path(path) {
|
||||||
@ -225,12 +223,9 @@ impl FilePreviewer {
|
|||||||
} else if let Ok(mut f) = File::open(path) {
|
} else if let Ok(mut f) = File::open(path) {
|
||||||
let mut buffer = [0u8; 256];
|
let mut buffer = [0u8; 256];
|
||||||
if let Ok(bytes_read) = f.read(&mut buffer) {
|
if let Ok(bytes_read) = f.read(&mut buffer) {
|
||||||
// TODO: add a check for the proportion of non printable characters (binary
|
|
||||||
// files)
|
|
||||||
if bytes_read > 0
|
if bytes_read > 0
|
||||||
&& is_valid_utf8(&buffer)
|
|
||||||
&& proportion_of_printable_ascii_characters(&buffer)
|
&& proportion_of_printable_ascii_characters(&buffer)
|
||||||
> Self::PRINTABLE_ASCII_THRESHOLD
|
> PRINTABLE_ASCII_THRESHOLD
|
||||||
{
|
{
|
||||||
file_type = FileType::Text;
|
file_type = FileType::Text;
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,7 @@ lazy_static! {
|
|||||||
|
|
||||||
pub const EMPTY_STRING: &str = "";
|
pub const EMPTY_STRING: &str = "";
|
||||||
pub const FOUR_SPACES: &str = " ";
|
pub const FOUR_SPACES: &str = " ";
|
||||||
|
pub const TAB_WIDTH: usize = 4;
|
||||||
|
|
||||||
const SPACE_CHARACTER: char = ' ';
|
const SPACE_CHARACTER: char = ' ';
|
||||||
const TAB_CHARACTER: char = '\t';
|
const TAB_CHARACTER: char = '\t';
|
||||||
@ -108,6 +109,12 @@ pub(crate) fn replace_nonprintable(input: &[u8], tab_width: usize) -> String {
|
|||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The threshold for considering a buffer to be printable ASCII.
|
||||||
|
///
|
||||||
|
/// This is used to determine whether a file is likely to be a text file
|
||||||
|
/// based on a sample of its contents.
|
||||||
|
pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
|
||||||
|
|
||||||
pub(crate) fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
|
pub(crate) fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
|
||||||
let mut printable = 0;
|
let mut printable = 0;
|
||||||
for &byte in buffer {
|
for &byte in buffer {
|
||||||
@ -131,7 +138,7 @@ pub(crate) fn preprocess_line(line: &str) -> String {
|
|||||||
}
|
}
|
||||||
.trim_end_matches(['\r', '\n', '\0'])
|
.trim_end_matches(['\r', '\n', '\0'])
|
||||||
.as_bytes(),
|
.as_bytes(),
|
||||||
2,
|
TAB_WIDTH,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user