Infer file extension when decompressing

This commit is contained in:
Nbiba Bedis 2021-11-06 16:10:25 +01:00 committed by João M. Bezerra
parent abf1d4e3e4
commit 266cf6e27a
5 changed files with 163 additions and 5 deletions

View File

@ -4,6 +4,7 @@
use std::{
io::{self, BufReader, BufWriter, Read, Write},
ops::ControlFlow,
path::{Path, PathBuf},
};
@ -20,8 +21,11 @@ use crate::{
},
info,
list::{self, ListOptions},
utils::{self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf},
Opts, QuestionPolicy, Subcommand,
utils::{
self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf, try_infer,
user_wants_to_continue_decompressing,
},
warning, Opts, QuestionPolicy, Subcommand,
};
// Used in BufReader and BufWriter to perform less syscalls
@ -170,6 +174,10 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
formats.push(file_formats);
}
if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
return Ok(());
}
let files_missing_format: Vec<PathBuf> = files
.iter()
.zip(&formats)
@ -208,6 +216,10 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
formats.push(file_formats);
}
if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
return Ok(());
}
let not_archives: Vec<PathBuf> = files
.iter()
.zip(&formats)
@ -473,3 +485,43 @@ fn list_archive_contents(
list::list_files(archive_path, files, list_options);
Ok(())
}
fn check_mime_type(
files: &[PathBuf],
formats: &mut Vec<Vec<Extension>>,
question_policy: QuestionPolicy,
) -> crate::Result<ControlFlow<()>> {
for (path, format) in files.iter().zip(formats.iter_mut()) {
if format.is_empty() {
// File with no extension
// Try to detect it automatically and prompt the user about it
if let Some(detected_format) = try_infer(path) {
info!("Detected file: `{}` extension as `{}`", path.display(), detected_format);
if user_wants_to_continue_decompressing(path, question_policy)? {
format.push(detected_format);
} else {
return Ok(ControlFlow::Break(()));
}
}
} else if let Some(detected_format) = try_infer(path) {
// File ending with extension
// Try to detect the extension and warn the user if it differs from the written one
let outer_ext = format.iter().next().unwrap();
if outer_ext != &detected_format {
warning!(
"The file extension: `{}` differ from the detected extension: `{}`",
outer_ext,
detected_format
);
if !user_wants_to_continue_decompressing(path, question_policy)? {
return Ok(ControlFlow::Break(()));
}
}
} else {
// NOTE: If this actually produces no false positives, we can upgrade it in the future
// to a warning and ask the user if he wants to continue decompressing.
info!("Could not detect the extension of `{}`", path.display());
}
}
Ok(ControlFlow::Continue(()))
}

View File

@ -15,3 +15,19 @@ pub fn _info_helper() {
print!("{}[INFO]{} ", *YELLOW, *RESET);
}
/// Macro that prints [WARNING] messages, wraps [`println`].
#[macro_export]
macro_rules! warning {
($($arg:tt)*) => {
$crate::macros::_warning_helper();
println!($($arg)*);
};
}
/// Helper to display "[INFO]", colored yellow
pub fn _warning_helper() {
use crate::utils::colors::{ORANGE, RESET};
print!("{}[WARNING]{} ", *ORANGE, *RESET);
}

View File

@ -9,7 +9,7 @@ use std::{
use fs_err as fs;
use crate::info;
use crate::{extension::Extension, info};
/// Checks given path points to an empty directory.
pub fn dir_is_empty(dir_path: &Path) -> bool {
@ -80,6 +80,78 @@ pub fn nice_directory_display(os_str: impl AsRef<OsStr>) -> Cow<'static, str> {
}
}
/// Try to detect the file extension by looking for known magic strings
/// Source: https://en.wikipedia.org/wiki/List_of_file_signatures
pub fn try_infer(path: &Path) -> Option<Extension> {
fn is_zip(buf: &[u8]) -> bool {
buf.len() > 3
&& buf[0] == 0x50
&& buf[1] == 0x4B
&& (buf[2] == 0x3 || buf[2] == 0x5 || buf[2] == 0x7)
&& (buf[3] == 0x4 || buf[3] == 0x6 || buf[3] == 0x8)
}
fn is_tar(buf: &[u8]) -> bool {
buf.len() > 261
&& buf[257] == 0x75
&& buf[258] == 0x73
&& buf[259] == 0x74
&& buf[260] == 0x61
&& buf[261] == 0x72
}
fn is_gz(buf: &[u8]) -> bool {
buf.len() > 2 && buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8
}
fn is_bz2(buf: &[u8]) -> bool {
buf.len() > 2 && buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68
}
fn is_xz(buf: &[u8]) -> bool {
buf.len() > 5
&& buf[0] == 0xFD
&& buf[1] == 0x37
&& buf[2] == 0x7A
&& buf[3] == 0x58
&& buf[4] == 0x5A
&& buf[5] == 0x00
}
fn is_lz(buf: &[u8]) -> bool {
buf.len() > 3 && buf[0] == 0x4C && buf[1] == 0x5A && buf[2] == 0x49 && buf[3] == 0x50
}
fn is_lz4(buf: &[u8]) -> bool {
buf.len() > 3 && buf[0] == 0x04 && buf[1] == 0x22 && buf[2] == 0x4D && buf[3] == 0x18
}
fn is_zst(buf: &[u8]) -> bool {
buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD
}
let buf = {
let mut b = [0; 270];
// Reading errors will just make the inferring fail so its safe to ignore
let _ = std::fs::File::open(&path).map(|mut f| std::io::Read::read(&mut f, &mut b));
b
};
use crate::extension::CompressionFormat::*;
if is_zip(&buf) {
Some(Extension::new(&[Zip], "zip"))
} else if is_tar(&buf) {
Some(Extension::new(&[Tar], "tar"))
} else if is_gz(&buf) {
Some(Extension::new(&[Gzip], "gz"))
} else if is_bz2(&buf) {
Some(Extension::new(&[Bzip], "bz2"))
} else if is_xz(&buf) {
Some(Extension::new(&[Lzma], "xz"))
} else if is_lz(&buf) {
Some(Extension::new(&[Lzma], "lz"))
} else if is_lz4(&buf) {
Some(Extension::new(&[Lz4], "lz4"))
} else if is_zst(&buf) {
Some(Extension::new(&[Zstd], "zst"))
} else {
None
}
}
/// Module with a list of bright colors.
#[allow(dead_code)]
pub mod colors {
@ -108,6 +180,8 @@ pub mod colors {
color!(RED = "\u{1b}[38;5;9m");
color!(WHITE = "\u{1b}[38;5;15m");
color!(YELLOW = "\u{1b}[38;5;11m");
// Requires true color support
color!(ORANGE = "\u{1b}[38;2;255;165;0m");
color!(STYLE_BOLD = "\u{1b}[1m");
color!(STYLE_RESET = "\u{1b}[0m");
color!(ALL_RESET = "\u{1b}[0;39m");

View File

@ -7,6 +7,8 @@ mod question_policy;
pub use bytes::Bytes;
pub use fs::{
cd_into_same_dir_as, colors, concatenate_list_of_os_str, create_dir_if_non_existent, dir_is_empty,
nice_directory_display, strip_cur_dir, to_utf,
nice_directory_display, strip_cur_dir, to_utf, try_infer,
};
pub use question_policy::{
create_or_ask_overwrite, user_wants_to_continue_decompressing, user_wants_to_overwrite, QuestionPolicy,
};
pub use question_policy::{create_or_ask_overwrite, user_wants_to_overwrite, QuestionPolicy};

View File

@ -53,3 +53,17 @@ pub fn create_or_ask_overwrite(path: &Path, question_policy: QuestionPolicy) ->
Err(e) => Err(Error::from(e)),
}
}
/// Check if QuestionPolicy flags were set, otherwise, ask the user if they want to continue decompressing.
pub fn user_wants_to_continue_decompressing(path: &Path, question_policy: QuestionPolicy) -> crate::Result<bool> {
match question_policy {
QuestionPolicy::AlwaysYes => Ok(true),
QuestionPolicy::AlwaysNo => Ok(false),
QuestionPolicy::Ask => {
let path = to_utf(strip_cur_dir(path));
let path = Some(path.as_str());
let placeholder = Some("FILE");
Confirmation::new("Do you want to continue decompressing 'FILE'?", placeholder).ask(path)
}
}
}