Merge pull request #154 from sigmaSd/infer

Infer file extension when decompressing
This commit is contained in:
João Marcos Bezerra 2021-11-10 06:40:54 -03:00 committed by GitHub
commit 1026fa19fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 165 additions and 11 deletions

View File

@ -19,4 +19,8 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Copyright notices from other projects:
Copyright (c) 2019 Bojan
https://github.com/bojand/infer

View File

@ -4,6 +4,7 @@
use std::{
io::{self, BufReader, BufWriter, Read, Write},
ops::ControlFlow,
path::{Path, PathBuf},
};
@ -20,8 +21,11 @@ use crate::{
},
info,
list::{self, ListOptions},
utils::{self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf},
Opts, QuestionPolicy, Subcommand,
utils::{
self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf, try_infer_extension,
user_wants_to_continue_decompressing,
},
warning, Opts, QuestionPolicy, Subcommand,
};
// Used in BufReader and BufWriter to perform less syscalls
@ -170,6 +174,10 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
formats.push(file_formats);
}
if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
return Ok(());
}
let files_missing_format: Vec<PathBuf> = files
.iter()
.zip(&formats)
@ -208,6 +216,10 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
formats.push(file_formats);
}
if let ControlFlow::Break(_) = check_mime_type(&files, &mut formats, question_policy)? {
return Ok(());
}
let not_archives: Vec<PathBuf> = files
.iter()
.zip(&formats)
@ -473,3 +485,43 @@ fn list_archive_contents(
list::list_files(archive_path, files, list_options);
Ok(())
}
fn check_mime_type(
files: &[PathBuf],
formats: &mut Vec<Vec<Extension>>,
question_policy: QuestionPolicy,
) -> crate::Result<ControlFlow<()>> {
for (path, format) in files.iter().zip(formats.iter_mut()) {
if format.is_empty() {
// File with no extension
// Try to detect it automatically and prompt the user about it
if let Some(detected_format) = try_infer_extension(path) {
info!("Detected file: `{}` extension as `{}`", path.display(), detected_format);
if user_wants_to_continue_decompressing(path, question_policy)? {
format.push(detected_format);
} else {
return Ok(ControlFlow::Break(()));
}
}
} else if let Some(detected_format) = try_infer_extension(path) {
// File ending with extension
// Try to detect the extension and warn the user if it differs from the written one
let outer_ext = format.iter().next().unwrap();
if outer_ext != &detected_format {
warning!(
"The file extension: `{}` differ from the detected extension: `{}`",
outer_ext,
detected_format
);
if !user_wants_to_continue_decompressing(path, question_policy)? {
return Ok(ControlFlow::Break(()));
}
}
} else {
// NOTE: If this actually produces no false positives, we can upgrade it in the future
// to a warning and ask the user if he wants to continue decompressing.
info!("Could not detect the extension of `{}`", path.display());
}
}
Ok(ControlFlow::Continue(()))
}

View File

@ -15,3 +15,19 @@ pub fn _info_helper() {
print!("{}[INFO]{} ", *YELLOW, *RESET);
}
/// Macro that prints [WARNING] messages, wraps [`println`].
#[macro_export]
macro_rules! warning {
($($arg:tt)*) => {
$crate::macros::_warning_helper();
println!($($arg)*);
};
}
/// Helper to display "[INFO]", colored yellow
pub fn _warning_helper() {
use crate::utils::colors::{ORANGE, RESET};
print!("{}[WARNING]{} ", *ORANGE, *RESET);
}

View File

@ -4,16 +4,18 @@ use std::{
borrow::Cow,
env,
ffi::OsStr,
fs::ReadDir,
io::Read,
path::{Component, Path, PathBuf},
};
use fs_err as fs;
use crate::info;
use crate::{extension::Extension, info};
/// Checks given path points to an empty directory.
pub fn dir_is_empty(dir_path: &Path) -> bool {
let is_empty = |mut rd: std::fs::ReadDir| rd.next().is_none();
let is_empty = |mut rd: ReadDir| rd.next().is_none();
dir_path.read_dir().map(is_empty).unwrap_or_default()
}
@ -80,13 +82,80 @@ pub fn nice_directory_display(os_str: impl AsRef<OsStr>) -> Cow<'static, str> {
}
}
/// Try to detect the file extension by looking for known magic strings
/// Source: https://en.wikipedia.org/wiki/List_of_file_signatures
pub fn try_infer_extension(path: &Path) -> Option<Extension> {
fn is_zip(buf: &[u8]) -> bool {
buf.len() >= 3
&& buf[..=1] == [0x50, 0x4B]
&& (buf[2..=3] == [0x3, 0x4] || buf[2..=3] == [0x5, 0x6] || buf[2..=3] == [0x7, 0x8])
}
fn is_tar(buf: &[u8]) -> bool {
buf.len() > 261 && buf[257..=261] == [0x75, 0x73, 0x74, 0x61, 0x72]
}
fn is_gz(buf: &[u8]) -> bool {
buf.len() > 2 && buf[..=2] == [0x1F, 0x8B, 0x8]
}
fn is_bz2(buf: &[u8]) -> bool {
buf.len() > 2 && buf[..=2] == [0x42, 0x5A, 0x68]
}
fn is_xz(buf: &[u8]) -> bool {
buf.len() > 5 && buf[..=5] == [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]
}
fn is_lz(buf: &[u8]) -> bool {
buf.len() > 3 && buf[..=3] == [0x4C, 0x5A, 0x49, 0x50]
}
fn is_lz4(buf: &[u8]) -> bool {
buf.len() > 3 && buf[..=3] == [0x04, 0x22, 0x4D, 0x18]
}
fn is_zst(buf: &[u8]) -> bool {
buf.len() > 3 && buf[..=3] == [0x28, 0xB5, 0x2F, 0xFD]
}
let buf = {
let mut buf = [0; 270];
// Error cause will be ignored, so use std::fs instead of fs_err
let result = std::fs::File::open(&path).map(|mut file| file.read(&mut buf));
// In case of file open or read failure, could not infer a extension
if result.is_err() {
return None;
}
buf
};
use crate::extension::CompressionFormat::*;
if is_zip(&buf) {
Some(Extension::new(&[Zip], "zip"))
} else if is_tar(&buf) {
Some(Extension::new(&[Tar], "tar"))
} else if is_gz(&buf) {
Some(Extension::new(&[Gzip], "gz"))
} else if is_bz2(&buf) {
Some(Extension::new(&[Bzip], "bz2"))
} else if is_xz(&buf) {
Some(Extension::new(&[Lzma], "xz"))
} else if is_lz(&buf) {
Some(Extension::new(&[Lzma], "lz"))
} else if is_lz4(&buf) {
Some(Extension::new(&[Lz4], "lz4"))
} else if is_zst(&buf) {
Some(Extension::new(&[Zstd], "zst"))
} else {
None
}
}
/// Module with a list of bright colors.
#[allow(dead_code)]
pub mod colors {
use std::env;
use once_cell::sync::Lazy;
static DISABLE_COLORED_TEXT: Lazy<bool> = Lazy::new(|| {
std::env::var_os("NO_COLOR").is_some() || atty::isnt(atty::Stream::Stdout) || atty::isnt(atty::Stream::Stderr)
env::var_os("NO_COLOR").is_some() || atty::isnt(atty::Stream::Stdout) || atty::isnt(atty::Stream::Stderr)
});
macro_rules! color {
@ -108,6 +177,8 @@ pub mod colors {
color!(RED = "\u{1b}[38;5;9m");
color!(WHITE = "\u{1b}[38;5;15m");
color!(YELLOW = "\u{1b}[38;5;11m");
// Requires true color support
color!(ORANGE = "\u{1b}[38;2;255;165;0m");
color!(STYLE_BOLD = "\u{1b}[1m");
color!(STYLE_RESET = "\u{1b}[0m");
color!(ALL_RESET = "\u{1b}[0;39m");

View File

@ -5,8 +5,5 @@ mod fs;
mod question_policy;
pub use bytes::Bytes;
pub use fs::{
cd_into_same_dir_as, colors, concatenate_list_of_os_str, create_dir_if_non_existent, dir_is_empty,
nice_directory_display, strip_cur_dir, to_utf,
};
pub use question_policy::{create_or_ask_overwrite, user_wants_to_overwrite, QuestionPolicy};
pub use fs::*;
pub use question_policy::*;

View File

@ -53,3 +53,17 @@ pub fn create_or_ask_overwrite(path: &Path, question_policy: QuestionPolicy) ->
Err(e) => Err(Error::from(e)),
}
}
/// Check if QuestionPolicy flags were set, otherwise, ask the user if they want to continue decompressing.
pub fn user_wants_to_continue_decompressing(path: &Path, question_policy: QuestionPolicy) -> crate::Result<bool> {
match question_policy {
QuestionPolicy::AlwaysYes => Ok(true),
QuestionPolicy::AlwaysNo => Ok(false),
QuestionPolicy::Ask => {
let path = to_utf(strip_cur_dir(path));
let path = Some(path.as_str());
let placeholder = Some("FILE");
Confirmation::new("Do you want to continue decompressing 'FILE'?", placeholder).ask(path)
}
}
}