From 266cf6e27ae318f143e738b4091a7ac8b4a60ea1 Mon Sep 17 00:00:00 2001 From: Nbiba Bedis Date: Sat, 6 Nov 2021 16:10:25 +0100 Subject: [PATCH 1/5] Infer file extension when decompressing --- src/commands.rs | 56 +++++++++++++++++++++++++- src/macros.rs | 16 ++++++++ src/utils/fs.rs | 76 +++++++++++++++++++++++++++++++++++- src/utils/mod.rs | 6 ++- src/utils/question_policy.rs | 14 +++++++ 5 files changed, 163 insertions(+), 5 deletions(-) diff --git a/src/commands.rs b/src/commands.rs index 06f0736..f4bc513 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -4,6 +4,7 @@ use std::{ io::{self, BufReader, BufWriter, Read, Write}, + ops::ControlFlow, path::{Path, PathBuf}, }; @@ -20,8 +21,11 @@ use crate::{ }, info, list::{self, ListOptions}, - utils::{self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf}, - Opts, QuestionPolicy, Subcommand, + utils::{ + self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf, try_infer, + user_wants_to_continue_decompressing, + }, + warning, Opts, QuestionPolicy, Subcommand, }; // Used in BufReader and BufWriter to perform less syscalls @@ -170,6 +174,10 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> { formats.push(file_formats); } + if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? { + return Ok(()); + } + let files_missing_format: Vec = files .iter() .zip(&formats) @@ -208,6 +216,10 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> { formats.push(file_formats); } + if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? { + return Ok(()); + } + let not_archives: Vec = files .iter() .zip(&formats) @@ -473,3 +485,43 @@ fn list_archive_contents( list::list_files(archive_path, files, list_options); Ok(()) } + +fn check_mime_type( + files: &[PathBuf], + formats: &mut Vec>, + question_policy: QuestionPolicy, +) -> crate::Result> { + for (path, format) in files.iter().zip(formats.iter_mut()) { + if format.is_empty() { + // File with no extension + // Try to detect it automatically and prompt the user about it + if let Some(detected_format) = try_infer(path) { + info!("Detected file: `{}` extension as `{}`", path.display(), detected_format); + if user_wants_to_continue_decompressing(path, question_policy)? { + format.push(detected_format); + } else { + return Ok(ControlFlow::Break(())); + } + } + } else if let Some(detected_format) = try_infer(path) { + // File ending with extension + // Try to detect the extension and warn the user if it differs from the written one + let outer_ext = format.iter().next().unwrap(); + if outer_ext != &detected_format { + warning!( + "The file extension: `{}` differ from the detected extension: `{}`", + outer_ext, + detected_format + ); + if !user_wants_to_continue_decompressing(path, question_policy)? { + return Ok(ControlFlow::Break(())); + } + } + } else { + // NOTE: If this actually produces no false positives, we can upgrade it in the future + // to a warning and ask the user if he wants to continue decompressing. + info!("Could not detect the extension of `{}`", path.display()); + } + } + Ok(ControlFlow::Continue(())) +} diff --git a/src/macros.rs b/src/macros.rs index 1057120..349d7ea 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -15,3 +15,19 @@ pub fn _info_helper() { print!("{}[INFO]{} ", *YELLOW, *RESET); } + +/// Macro that prints [WARNING] messages, wraps [`println`]. +#[macro_export] +macro_rules! warning { + ($($arg:tt)*) => { + $crate::macros::_warning_helper(); + println!($($arg)*); + }; +} + +/// Helper to display "[INFO]", colored yellow +pub fn _warning_helper() { + use crate::utils::colors::{ORANGE, RESET}; + + print!("{}[WARNING]{} ", *ORANGE, *RESET); +} diff --git a/src/utils/fs.rs b/src/utils/fs.rs index 00bfcdd..11332a9 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -9,7 +9,7 @@ use std::{ use fs_err as fs; -use crate::info; +use crate::{extension::Extension, info}; /// Checks given path points to an empty directory. pub fn dir_is_empty(dir_path: &Path) -> bool { @@ -80,6 +80,78 @@ pub fn nice_directory_display(os_str: impl AsRef) -> Cow<'static, str> { } } +/// Try to detect the file extension by looking for known magic strings +/// Source: https://en.wikipedia.org/wiki/List_of_file_signatures +pub fn try_infer(path: &Path) -> Option { + fn is_zip(buf: &[u8]) -> bool { + buf.len() > 3 + && buf[0] == 0x50 + && buf[1] == 0x4B + && (buf[2] == 0x3 || buf[2] == 0x5 || buf[2] == 0x7) + && (buf[3] == 0x4 || buf[3] == 0x6 || buf[3] == 0x8) + } + fn is_tar(buf: &[u8]) -> bool { + buf.len() > 261 + && buf[257] == 0x75 + && buf[258] == 0x73 + && buf[259] == 0x74 + && buf[260] == 0x61 + && buf[261] == 0x72 + } + fn is_gz(buf: &[u8]) -> bool { + buf.len() > 2 && buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8 + } + fn is_bz2(buf: &[u8]) -> bool { + buf.len() > 2 && buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68 + } + fn is_xz(buf: &[u8]) -> bool { + buf.len() > 5 + && buf[0] == 0xFD + && buf[1] == 0x37 + && buf[2] == 0x7A + && buf[3] == 0x58 + && buf[4] == 0x5A + && buf[5] == 0x00 + } + fn is_lz(buf: &[u8]) -> bool { + buf.len() > 3 && buf[0] == 0x4C && buf[1] == 0x5A && buf[2] == 0x49 && buf[3] == 0x50 + } + fn is_lz4(buf: &[u8]) -> bool { + buf.len() > 3 && buf[0] == 0x04 && buf[1] == 0x22 && buf[2] == 0x4D && buf[3] == 0x18 + } + fn is_zst(buf: &[u8]) -> bool { + buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD + } + + let buf = { + let mut b = [0; 270]; + // Reading errors will just make the inferring fail so its safe to ignore + let _ = std::fs::File::open(&path).map(|mut f| std::io::Read::read(&mut f, &mut b)); + b + }; + + use crate::extension::CompressionFormat::*; + if is_zip(&buf) { + Some(Extension::new(&[Zip], "zip")) + } else if is_tar(&buf) { + Some(Extension::new(&[Tar], "tar")) + } else if is_gz(&buf) { + Some(Extension::new(&[Gzip], "gz")) + } else if is_bz2(&buf) { + Some(Extension::new(&[Bzip], "bz2")) + } else if is_xz(&buf) { + Some(Extension::new(&[Lzma], "xz")) + } else if is_lz(&buf) { + Some(Extension::new(&[Lzma], "lz")) + } else if is_lz4(&buf) { + Some(Extension::new(&[Lz4], "lz4")) + } else if is_zst(&buf) { + Some(Extension::new(&[Zstd], "zst")) + } else { + None + } +} + /// Module with a list of bright colors. #[allow(dead_code)] pub mod colors { @@ -108,6 +180,8 @@ pub mod colors { color!(RED = "\u{1b}[38;5;9m"); color!(WHITE = "\u{1b}[38;5;15m"); color!(YELLOW = "\u{1b}[38;5;11m"); + // Requires true color support + color!(ORANGE = "\u{1b}[38;2;255;165;0m"); color!(STYLE_BOLD = "\u{1b}[1m"); color!(STYLE_RESET = "\u{1b}[0m"); color!(ALL_RESET = "\u{1b}[0;39m"); diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 1930cc2..4b3bee3 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -7,6 +7,8 @@ mod question_policy; pub use bytes::Bytes; pub use fs::{ cd_into_same_dir_as, colors, concatenate_list_of_os_str, create_dir_if_non_existent, dir_is_empty, - nice_directory_display, strip_cur_dir, to_utf, + nice_directory_display, strip_cur_dir, to_utf, try_infer, +}; +pub use question_policy::{ + create_or_ask_overwrite, user_wants_to_continue_decompressing, user_wants_to_overwrite, QuestionPolicy, }; -pub use question_policy::{create_or_ask_overwrite, user_wants_to_overwrite, QuestionPolicy}; diff --git a/src/utils/question_policy.rs b/src/utils/question_policy.rs index 41e9f2e..fe92d1a 100644 --- a/src/utils/question_policy.rs +++ b/src/utils/question_policy.rs @@ -53,3 +53,17 @@ pub fn create_or_ask_overwrite(path: &Path, question_policy: QuestionPolicy) -> Err(e) => Err(Error::from(e)), } } + +/// Check if QuestionPolicy flags were set, otherwise, ask the user if they want to continue decompressing. +pub fn user_wants_to_continue_decompressing(path: &Path, question_policy: QuestionPolicy) -> crate::Result { + match question_policy { + QuestionPolicy::AlwaysYes => Ok(true), + QuestionPolicy::AlwaysNo => Ok(false), + QuestionPolicy::Ask => { + let path = to_utf(strip_cur_dir(path)); + let path = Some(path.as_str()); + let placeholder = Some("FILE"); + Confirmation::new("Do you want to continue decompressing 'FILE'?", placeholder).ask(path) + } + } +} From 2d2a018cbe111c949e9c278b78cc6b4f0e445f31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20M=2E=20Bezerra?= Date: Wed, 10 Nov 2021 05:46:44 -0300 Subject: [PATCH 2/5] Update LICENSE Adding copyright notice from "infer" crate --- LICENSE | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 66a271b..bd024e7 100644 --- a/LICENSE +++ b/LICENSE @@ -19,4 +19,8 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - + +Copyright notices from other projects: + +Copyright (c) 2019 Bojan +https://github.com/bojand/infer From 552096acf0349ba45c27ae88d8d578b51b5ecfa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20M=2E=20Bezerra?= Date: Wed, 10 Nov 2021 06:03:46 -0300 Subject: [PATCH 3/5] Minor import and module exporting changes --- src/commands.rs | 6 +++--- src/utils/fs.rs | 10 +++++++--- src/utils/mod.rs | 9 ++------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/commands.rs b/src/commands.rs index f4bc513..688c91d 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -22,7 +22,7 @@ use crate::{ info, list::{self, ListOptions}, utils::{ - self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf, try_infer, + self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf, try_infer_extension, user_wants_to_continue_decompressing, }, warning, Opts, QuestionPolicy, Subcommand, @@ -495,7 +495,7 @@ fn check_mime_type( if format.is_empty() { // File with no extension // Try to detect it automatically and prompt the user about it - if let Some(detected_format) = try_infer(path) { + if let Some(detected_format) = try_infer_extension(path) { info!("Detected file: `{}` extension as `{}`", path.display(), detected_format); if user_wants_to_continue_decompressing(path, question_policy)? { format.push(detected_format); @@ -503,7 +503,7 @@ fn check_mime_type( return Ok(ControlFlow::Break(())); } } - } else if let Some(detected_format) = try_infer(path) { + } else if let Some(detected_format) = try_infer_extension(path) { // File ending with extension // Try to detect the extension and warn the user if it differs from the written one let outer_ext = format.iter().next().unwrap(); diff --git a/src/utils/fs.rs b/src/utils/fs.rs index 11332a9..86f49d4 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -4,6 +4,8 @@ use std::{ borrow::Cow, env, ffi::OsStr, + fs::ReadDir, + io::Read, path::{Component, Path, PathBuf}, }; @@ -13,7 +15,7 @@ use crate::{extension::Extension, info}; /// Checks given path points to an empty directory. pub fn dir_is_empty(dir_path: &Path) -> bool { - let is_empty = |mut rd: std::fs::ReadDir| rd.next().is_none(); + let is_empty = |mut rd: ReadDir| rd.next().is_none(); dir_path.read_dir().map(is_empty).unwrap_or_default() } @@ -82,7 +84,7 @@ pub fn nice_directory_display(os_str: impl AsRef) -> Cow<'static, str> { /// Try to detect the file extension by looking for known magic strings /// Source: https://en.wikipedia.org/wiki/List_of_file_signatures -pub fn try_infer(path: &Path) -> Option { +pub fn try_infer_extension(path: &Path) -> Option { fn is_zip(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x50 @@ -155,10 +157,12 @@ pub fn try_infer(path: &Path) -> Option { /// Module with a list of bright colors. #[allow(dead_code)] pub mod colors { + use std::env; + use once_cell::sync::Lazy; static DISABLE_COLORED_TEXT: Lazy = Lazy::new(|| { - std::env::var_os("NO_COLOR").is_some() || atty::isnt(atty::Stream::Stdout) || atty::isnt(atty::Stream::Stderr) + env::var_os("NO_COLOR").is_some() || atty::isnt(atty::Stream::Stdout) || atty::isnt(atty::Stream::Stderr) }); macro_rules! color { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 4b3bee3..8b147bf 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -5,10 +5,5 @@ mod fs; mod question_policy; pub use bytes::Bytes; -pub use fs::{ - cd_into_same_dir_as, colors, concatenate_list_of_os_str, create_dir_if_non_existent, dir_is_empty, - nice_directory_display, strip_cur_dir, to_utf, try_infer, -}; -pub use question_policy::{ - create_or_ask_overwrite, user_wants_to_continue_decompressing, user_wants_to_overwrite, QuestionPolicy, -}; +pub use fs::*; +pub use question_policy::*; From 16acb98b6ef427520313120071bedec7022bbc4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20M=2E=20Bezerra?= Date: Wed, 10 Nov 2021 06:04:31 -0300 Subject: [PATCH 4/5] Early return when can't detect extension from magic numbers --- src/utils/fs.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/utils/fs.rs b/src/utils/fs.rs index 86f49d4..6d8856f 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -126,10 +126,16 @@ pub fn try_infer_extension(path: &Path) -> Option { } let buf = { - let mut b = [0; 270]; - // Reading errors will just make the inferring fail so its safe to ignore - let _ = std::fs::File::open(&path).map(|mut f| std::io::Read::read(&mut f, &mut b)); - b + let mut buf = [0; 270]; + + // Error cause will be ignored, so use std::fs instead of fs_err + let result = std::fs::File::open(&path).map(|mut file| file.read(&mut buf)); + + // In case of file open or read failure, could not infer a extension + if result.is_err() { + return None; + } + buf }; use crate::extension::CompressionFormat::*; From cbc87866faf30fb962ab362f04e9755db5193a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20M=2E=20Bezerra?= Date: Wed, 10 Nov 2021 06:34:05 -0300 Subject: [PATCH 5/5] Fix zip magic numbers detection And reworked detection for other formats as well --- src/utils/fs.rs | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/src/utils/fs.rs b/src/utils/fs.rs index 6d8856f..efa81be 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -86,43 +86,30 @@ pub fn nice_directory_display(os_str: impl AsRef) -> Cow<'static, str> { /// Source: https://en.wikipedia.org/wiki/List_of_file_signatures pub fn try_infer_extension(path: &Path) -> Option { fn is_zip(buf: &[u8]) -> bool { - buf.len() > 3 - && buf[0] == 0x50 - && buf[1] == 0x4B - && (buf[2] == 0x3 || buf[2] == 0x5 || buf[2] == 0x7) - && (buf[3] == 0x4 || buf[3] == 0x6 || buf[3] == 0x8) + buf.len() >= 3 + && buf[..=1] == [0x50, 0x4B] + && (buf[2..=3] == [0x3, 0x4] || buf[2..=3] == [0x5, 0x6] || buf[2..=3] == [0x7, 0x8]) } fn is_tar(buf: &[u8]) -> bool { - buf.len() > 261 - && buf[257] == 0x75 - && buf[258] == 0x73 - && buf[259] == 0x74 - && buf[260] == 0x61 - && buf[261] == 0x72 + buf.len() > 261 && buf[257..=261] == [0x75, 0x73, 0x74, 0x61, 0x72] } fn is_gz(buf: &[u8]) -> bool { - buf.len() > 2 && buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8 + buf.len() > 2 && buf[..=2] == [0x1F, 0x8B, 0x8] } fn is_bz2(buf: &[u8]) -> bool { - buf.len() > 2 && buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68 + buf.len() > 2 && buf[..=2] == [0x42, 0x5A, 0x68] } fn is_xz(buf: &[u8]) -> bool { - buf.len() > 5 - && buf[0] == 0xFD - && buf[1] == 0x37 - && buf[2] == 0x7A - && buf[3] == 0x58 - && buf[4] == 0x5A - && buf[5] == 0x00 + buf.len() > 5 && buf[..=5] == [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00] } fn is_lz(buf: &[u8]) -> bool { - buf.len() > 3 && buf[0] == 0x4C && buf[1] == 0x5A && buf[2] == 0x49 && buf[3] == 0x50 + buf.len() > 3 && buf[..=3] == [0x4C, 0x5A, 0x49, 0x50] } fn is_lz4(buf: &[u8]) -> bool { - buf.len() > 3 && buf[0] == 0x04 && buf[1] == 0x22 && buf[2] == 0x4D && buf[3] == 0x18 + buf.len() > 3 && buf[..=3] == [0x04, 0x22, 0x4D, 0x18] } fn is_zst(buf: &[u8]) -> bool { - buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD + buf.len() > 3 && buf[..=3] == [0x28, 0xB5, 0x2F, 0xFD] } let buf = {