build(infer): drop infer dependency and refactor code to a simpler heuristic (#58)

* build(infer): drop infer dependency and refactor code to a simpler heuristic

* Update changelog

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Alexandre Pasmantier 2024-11-23 00:37:39 +01:00 committed by GitHub
parent b757305d7a
commit f0e1115bab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 74 additions and 150 deletions

View File

@ -10,7 +10,7 @@ All notable changes to this project will be documented in this file.
### 🚜 Refactor ### 🚜 Refactor
- *(picker)* Refactor picker logic and add tests for picker, cli and events - *(picker)* Refactor picker logic and add tests to picker, cli, and events (#57)
### 📚 Documentation ### 📚 Documentation
@ -20,6 +20,10 @@ All notable changes to this project will be documented in this file.
- Add readme version update to github actions (#55) - Add readme version update to github actions (#55)
### Build
- *(infer)* Drop infer dependency and refactor code to a simpler heuristic
## [0.5.1] - 2024-11-20 ## [0.5.1] - 2024-11-20
### 📚 Documentation ### 📚 Documentation

28
Cargo.lock generated
View File

@ -264,12 +264,6 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.8.0" version = "1.8.0"
@ -364,17 +358,6 @@ dependencies = [
"shlex", "shlex",
] ]
[[package]]
name = "cfb"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f"
dependencies = [
"byteorder",
"fnv",
"uuid",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
@ -1761,15 +1744,6 @@ version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
[[package]]
name = "infer"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc150e5ce2330295b8616ce0e3f53250e53af31759a9dbedad1621ba29151847"
dependencies = [
"cfb",
]
[[package]] [[package]]
name = "instability" name = "instability"
version = "0.3.2" version = "0.3.2"
@ -2889,7 +2863,6 @@ version = "0.0.5"
dependencies = [ dependencies = [
"color-eyre", "color-eyre",
"devicons", "devicons",
"infer",
"parking_lot", "parking_lot",
"syntect", "syntect",
"television-channels", "television-channels",
@ -2908,7 +2881,6 @@ dependencies = [
"directories", "directories",
"gag", "gag",
"ignore", "ignore",
"infer",
"lazy_static", "lazy_static",
"syntect", "syntect",
"tracing", "tracing",

View File

@ -9,9 +9,7 @@ use std::{
sync::{atomic::AtomicUsize, Arc}, sync::{atomic::AtomicUsize, Arc},
}; };
use television_fuzzy::matcher::{config::Config, injector::Injector, Matcher}; use television_fuzzy::matcher::{config::Config, injector::Injector, Matcher};
use television_utils::files::{ use television_utils::files::{walk_builder, DEFAULT_NUM_THREADS};
is_not_text, walk_builder, DEFAULT_NUM_THREADS,
};
use television_utils::strings::{ use television_utils::strings::{
preprocess_line, proportion_of_printable_ascii_characters, preprocess_line, proportion_of_printable_ascii_characters,
PRINTABLE_ASCII_THRESHOLD, PRINTABLE_ASCII_THRESHOLD,
@ -298,7 +296,6 @@ fn try_inject_lines(
match reader.read(&mut buffer) { match reader.read(&mut buffer) {
Ok(bytes_read) => { Ok(bytes_read) => {
if (bytes_read == 0) if (bytes_read == 0)
|| is_not_text(&buffer).unwrap_or(false)
|| proportion_of_printable_ascii_characters(&buffer) || proportion_of_printable_ascii_characters(&buffer)
< PRINTABLE_ASCII_THRESHOLD < PRINTABLE_ASCII_THRESHOLD
{ {

View File

@ -22,5 +22,4 @@ tokio = "1.41.1"
termtree = "0.5.1" termtree = "0.5.1"
devicons = "0.6.11" devicons = "0.6.11"
color-eyre = "0.6.3" color-eyre = "0.6.3"
infer = "0.16.0"

View File

@ -3,8 +3,8 @@ use color_eyre::Result;
//use ratatui_image::picker::Picker; //use ratatui_image::picker::Picker;
use parking_lot::Mutex; use parking_lot::Mutex;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek}; use std::io::{BufRead, BufReader, Seek};
use std::path::{Path, PathBuf}; use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use syntect::{ use syntect::{
@ -16,12 +16,9 @@ use tracing::{debug, warn};
use super::cache::PreviewCache; use super::cache::PreviewCache;
use crate::previewers::{meta, Preview, PreviewContent}; use crate::previewers::{meta, Preview, PreviewContent};
use television_channels::entry; use television_channels::entry;
use television_utils::files::get_file_size;
use television_utils::files::FileType; use television_utils::files::FileType;
use television_utils::files::{get_file_size, is_known_text_extension}; use television_utils::strings::preprocess_line;
use television_utils::strings::{
preprocess_line, proportion_of_printable_ascii_characters,
PRINTABLE_ASCII_THRESHOLD,
};
use television_utils::syntax::{ use television_utils::syntax::{
self, load_highlighting_assets, HighlightingAssetsExt, self, load_highlighting_assets, HighlightingAssetsExt,
}; };
@ -69,6 +66,10 @@ impl FilePreviewer {
} }
} }
/// The maximum file size that we will try to preview.
/// 4 MB
const MAX_FILE_SIZE: u64 = 4 * 1024 * 1024;
/// Get a preview for a file entry. /// Get a preview for a file entry.
/// ///
/// # Panics /// # Panics
@ -93,53 +94,32 @@ impl FilePreviewer {
// try to determine file type // try to determine file type
debug!("Computing preview for {:?}", entry.name); debug!("Computing preview for {:?}", entry.name);
match self.get_file_type(&path_buf) { if let FileType::Text = FileType::from(&path_buf) {
FileType::Text => { debug!("File is text-based: {:?}", entry.name);
match File::open(&path_buf) { match File::open(&path_buf) {
Ok(file) => { Ok(file) => {
// insert a loading preview into the cache // insert a loading preview into the cache
let preview = meta::loading(&entry.name); let preview = meta::loading(&entry.name);
self.cache_preview( self.cache_preview(entry.name.clone(), preview.clone());
entry.name.clone(),
preview.clone(),
);
// compute the highlighted version in the background // compute the highlighted version in the background
let mut reader = BufReader::new(file); let mut reader = BufReader::new(file);
reader.seek(std::io::SeekFrom::Start(0)).unwrap(); reader.seek(std::io::SeekFrom::Start(0)).unwrap();
self.compute_highlighted_text_preview(entry, reader); self.compute_highlighted_text_preview(entry, reader);
preview preview
} }
Err(e) => { Err(e) => {
warn!("Error opening file: {:?}", e); warn!("Error opening file: {:?}", e);
let p = meta::not_supported(&entry.name); let p = meta::not_supported(&entry.name);
self.cache_preview(entry.name.clone(), p.clone()); self.cache_preview(entry.name.clone(), p.clone());
p p
}
} }
} }
FileType::Image => { } else {
debug!("Previewing image file: {:?}", entry.name); debug!("File isn't text-based: {:?}", entry.name);
// insert a loading preview into the cache let preview = meta::not_supported(&entry.name);
//let preview = loading(&entry.name); self.cache_preview(entry.name.clone(), preview.clone());
let preview = meta::not_supported(&entry.name); preview
self.cache_preview(entry.name.clone(), preview.clone());
//// compute the image preview in the background
//self.compute_image_preview(entry).await;
preview
}
FileType::Other => {
debug!("Previewing other file: {:?}", entry.name);
let preview = meta::not_supported(&entry.name);
self.cache_preview(entry.name.clone(), preview.clone());
preview
}
FileType::Unknown => {
debug!("Unknown file type: {:?}", entry.name);
let preview = meta::not_supported(&entry.name);
self.cache_preview(entry.name.clone(), preview.clone());
preview
}
} }
} }
@ -218,48 +198,6 @@ impl FilePreviewer {
}); });
} }
/// The maximum file size that we will try to preview.
/// 4 MB
const MAX_FILE_SIZE: u64 = 4 * 1024 * 1024;
fn get_file_type(&self, path: &Path) -> FileType {
debug!("Getting file type for {:?}", path);
let mut file_type = match infer::get_from_path(path) {
Ok(Some(t)) => {
let mime_type = t.mime_type();
if mime_type.contains("image") {
FileType::Image
} else if mime_type.contains("text") {
FileType::Text
} else {
FileType::Other
}
}
_ => FileType::Unknown,
};
// if the file type is unknown, try to determine it from the extension or the content
if matches!(file_type, FileType::Unknown) {
if is_known_text_extension(path) {
file_type = FileType::Text;
} else if let Ok(mut f) = File::open(path) {
let mut buffer = [0u8; 256];
if let Ok(bytes_read) = f.read(&mut buffer) {
if bytes_read > 0
&& proportion_of_printable_ascii_characters(
&buffer[..bytes_read],
) > PRINTABLE_ASCII_THRESHOLD
{
file_type = FileType::Text;
}
}
}
}
debug!("File type for {:?}: {:?}", path, file_type);
file_type
}
fn cache_preview(&mut self, key: String, preview: Arc<Preview>) { fn cache_preview(&mut self, key: String, preview: Arc<Preview>) {
self.cache.lock().insert(key, preview); self.cache.lock().insert(key, preview);
} }

View File

@ -14,7 +14,6 @@ rust-version.workspace = true
[dependencies] [dependencies]
ignore = "0.4.23" ignore = "0.4.23"
infer = "0.16.0"
lazy_static = "1.5.0" lazy_static = "1.5.0"
tracing = "0.1.40" tracing = "0.1.40"
color-eyre = "0.6.3" color-eyre = "0.6.3"

View File

@ -1,11 +1,16 @@
use std::fmt::Debug;
use std::fs::File;
use std::io::Read;
use std::path::Path; use std::path::Path;
use std::{collections::HashSet, path::PathBuf}; use std::{collections::HashSet, path::PathBuf};
use ignore::{overrides::Override, types::TypesBuilder, WalkBuilder}; use ignore::{overrides::Override, types::TypesBuilder, WalkBuilder};
use infer::Infer;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use tracing::debug; use tracing::{debug, warn};
use crate::strings::{
proportion_of_printable_ascii_characters, PRINTABLE_ASCII_THRESHOLD,
};
use crate::threads::default_num_threads; use crate::threads::default_num_threads;
lazy_static::lazy_static! { lazy_static::lazy_static! {
@ -51,34 +56,44 @@ pub fn get_file_size(path: &Path) -> Option<u64> {
#[derive(Debug)] #[derive(Debug)]
pub enum FileType { pub enum FileType {
Text, Text,
Image,
Other, Other,
Unknown, Unknown,
} }
pub fn is_not_text(bytes: &[u8]) -> Option<bool> { impl<P> From<P> for FileType
let infer = Infer::new(); where
match infer.get(bytes) { P: AsRef<Path> + Debug,
Some(t) => { {
let mime_type = t.mime_type(); fn from(path: P) -> Self {
if mime_type.contains("image") debug!("Getting file type for {:?}", path);
|| mime_type.contains("video") let p = path.as_ref();
|| mime_type.contains("audio") if is_known_text_extension(p) {
|| mime_type.contains("archive") return FileType::Text;
|| mime_type.contains("book")
|| mime_type.contains("font")
{
Some(true)
} else {
None
}
} }
None => None, if let Ok(mut f) = File::open(p) {
let mut buffer = [0u8; 256];
if let Ok(bytes_read) = f.read(&mut buffer) {
if bytes_read > 0
&& proportion_of_printable_ascii_characters(
&buffer[..bytes_read],
) > PRINTABLE_ASCII_THRESHOLD
{
return FileType::Text;
}
}
} else {
warn!("Error opening file: {:?}", path);
}
FileType::Other
} }
} }
pub fn is_known_text_extension(path: &Path) -> bool { pub fn is_known_text_extension<P>(path: P) -> bool
path.extension() where
P: AsRef<Path>,
{
path.as_ref()
.extension()
.and_then(|ext| ext.to_str()) .and_then(|ext| ext.to_str())
.is_some_and(|ext| KNOWN_TEXT_FILE_EXTENSIONS.contains(ext)) .is_some_and(|ext| KNOWN_TEXT_FILE_EXTENSIONS.contains(ext))
} }