From 97b4356aa8e27a1253de828357380553ef606b41 Mon Sep 17 00:00:00 2001 From: Flat Date: Fri, 24 Nov 2023 19:16:56 -0500 Subject: [PATCH] feat: implement 7zip support for compression and decompression This also fixes symlink canonicalization for Windows and fixes UI tests on Windows. --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/archive/sevenz.rs | 160 +++++++++++++++++++++++++++++++------ src/archive/zip.rs | 2 +- src/commands/compress.rs | 16 +++- src/commands/decompress.rs | 15 +++- src/commands/mod.rs | 10 +++ src/main.rs | 2 +- tests/ui.rs | 17 ++++ 9 files changed, 196 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6b5cc71..0f0ac61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -827,7 +827,7 @@ dependencies = [ "clap", "clap_complete", "clap_mangen", - "filetime", + "filetime_creation", "flate2", "fs-err", "gzp", diff --git a/Cargo.toml b/Cargo.toml index c31fbc4..4c195b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ atty = "0.2.14" bstr = { version = "1.8.0", default-features = false, features = ["std"] } bzip2 = "0.4.4" clap = { version = "4.4.8", features = ["derive", "env"] } -filetime = "0.2.22" +filetime_creation = "0.1" flate2 = { version = "1.0.28", default-features = false } fs-err = "2.11.0" gzp = { version = "0.11.3", default-features = false, features = ["snappy_default"] } diff --git a/src/archive/sevenz.rs b/src/archive/sevenz.rs index 4dbf639..7269d16 100644 --- a/src/archive/sevenz.rs +++ b/src/archive/sevenz.rs @@ -1,36 +1,150 @@ //! SevenZip archive format compress function -use std::path::{Path, PathBuf}; +use std::{ + env, + fs::File, + path::{Path, PathBuf}, io::{Write, Seek, Read}, +}; -use crate::utils::strip_cur_dir; +use same_file::Handle; -pub fn compress_sevenz(files: Vec, output_path: &Path) -> crate::Result { - let mut writer = sevenz_rust::SevenZWriter::create(output_path).map_err(crate::Error::SevenzipError)?; +use crate::{ + info, + utils::{self, cd_into_same_dir_as, EscapedPathDisplay, FileVisibilityPolicy, Bytes}, + warning, +}; - for filep in files.iter() { - writer - .push_archive_entry::( - sevenz_rust::SevenZArchiveEntry::from_path( - filep, - strip_cur_dir(filep) - .as_os_str() - .to_str() - .unwrap() - .to_string(), - ), - None, - ) - .map_err(crate::Error::SevenzipError)?; +pub fn compress_sevenz( + files: &[PathBuf], + output_path: &Path, + writer: W, + file_visibility_policy: FileVisibilityPolicy, + quiet: bool, +) -> crate::Result +where +W: Write + Seek { + + let mut writer = sevenz_rust::SevenZWriter::new(writer).map_err(crate::Error::SevenzipError)?; + let output_handle = Handle::from_path(output_path); + for filename in files { + let previous_location = cd_into_same_dir_as(filename)?; + + // Safe unwrap, input shall be treated before + let filename = filename.file_name().unwrap(); + + for entry in file_visibility_policy.build_walker(filename) { + let entry = entry?; + let path = entry.path(); + + // If the output_path is the same as the input file, warn the user and skip the input (in order to avoid compression recursion) + if let Ok(ref handle) = output_handle { + if matches!(Handle::from_path(path), Ok(x) if &x == handle) { + warning!( + "The output file and the input file are the same: `{}`, skipping...", + output_path.display() + ); + continue; + } + } + + // This is printed for every file in `input_filenames` and has + // little importance for most users, but would generate lots of + // spoken text for users using screen readers, braille displays + // and so on + if !quiet { + info!(inaccessible, "Compressing '{}'.", EscapedPathDisplay::new(path)); + } + + let metadata = match path.metadata() { + Ok(metadata) => metadata, + Err(e) => { + if e.kind() == std::io::ErrorKind::NotFound && utils::is_symlink(path) { + // This path is for a broken symlink + // We just ignore it + continue; + } + return Err(e.into()); + } + }; + + if metadata.is_dir() { + writer + .push_archive_entry::( + sevenz_rust::SevenZArchiveEntry::from_path(path, path.to_str().unwrap().to_owned()), + None, + ) + .map_err(crate::Error::SevenzipError)?; + } else { + let reader = File::open(path)?; + writer + .push_archive_entry::( + sevenz_rust::SevenZArchiveEntry::from_path(path, path.to_str().unwrap().to_owned()), + Some(reader), + ) + .map_err(crate::Error::SevenzipError)?; + } + } + + env::set_current_dir(previous_location)?; } - writer.finish()?; - Ok(true) + let bytes = writer.finish()?; + Ok(bytes) } -pub fn decompress_sevenz(input_file_path: &Path, output_path: &Path) -> crate::Result { +pub fn decompress_sevenz(reader: R, output_path: &Path, quiet: bool) -> crate::Result +where R: Read+ Seek { let mut count: usize = 0; - sevenz_rust::decompress_file_with_extract_fn(input_file_path, output_path, |entry, reader, dest| { + sevenz_rust::decompress_with_extract_fn(reader, output_path, |entry, reader, dest| { count += 1; - sevenz_rust::default_entry_extract_fn(entry, reader, dest) + // Manually handle writing all files from 7z archive, due to library exluding empty files + use std::io::BufWriter; + + use filetime_creation as ft; + + let file_path = output_path.join(entry.name()); + + if entry.is_directory() { + // This is printed for every file in the archive and has little + // importance for most users, but would generate lots of + // spoken text for users using screen readers, braille displays + // and so on + if !quiet { + info!(inaccessible, "File {} extracted to \"{}\"", entry.name(), file_path.display()); + } + let dir = dest; + if !dir.exists() { + std::fs::create_dir_all(dir)?; + } + } else { + // same reason is in _is_dir: long, often not needed text + if !quiet { + info!( + inaccessible, + "{:?} extracted. ({})", + file_path.display(), + Bytes::new(entry.size()), + ); + } + let path = dest; + path.parent().and_then(|p| { + if !p.exists() { + std::fs::create_dir_all(p).ok() + } else { + None + } + }); + let file = File::create(path)?; + let mut writer = BufWriter::new(file); + std::io::copy(reader, &mut writer)?; + ft::set_file_handle_times( + writer.get_ref(), + Some(ft::FileTime::from_system_time(entry.access_date().into())), + Some(ft::FileTime::from_system_time(entry.last_modified_date().into())), + Some(ft::FileTime::from_system_time(entry.creation_date().into())), + ) + .unwrap_or_default(); + } + Ok(true) }) .map_err(crate::Error::SevenzipError)?; Ok(count) diff --git a/src/archive/zip.rs b/src/archive/zip.rs index 60cc440..4de1af9 100644 --- a/src/archive/zip.rs +++ b/src/archive/zip.rs @@ -10,7 +10,7 @@ use std::{ thread, }; -use filetime::{set_file_mtime, FileTime}; +use filetime_creation::{set_file_mtime, FileTime}; use fs_err as fs; use same_file::Handle; use time::OffsetDateTime; diff --git a/src/commands/compress.rs b/src/commands/compress.rs index bd2a91b..c73191d 100644 --- a/src/commands/compress.rs +++ b/src/commands/compress.rs @@ -13,6 +13,8 @@ use crate::{ QuestionAction, QuestionPolicy, BUFFER_CAPACITY, }; +use super::warn_user_about_loading_sevenz_in_memory; + /// Compress files into `output_file`. /// /// # Arguments: @@ -127,7 +129,19 @@ pub fn compress_files( return Ok(false); }, SevenZip => { - archive::sevenz::compress_sevenz(files, output_path)?; + + if !formats.is_empty() { + warn_user_about_loading_sevenz_in_memory(); + + if !user_wants_to_continue(output_path, question_policy, QuestionAction::Compression)? { + return Ok(false); + } + } + + let mut vec_buffer = Cursor::new(vec![]); + archive::sevenz::compress_sevenz(&files, output_path, &mut vec_buffer, file_visibility_policy, quiet)?; + vec_buffer.rewind()?; + io::copy(&mut vec_buffer, &mut writer)?; } } diff --git a/src/commands/decompress.rs b/src/commands/decompress.rs index 9f70433..b7cd0b3 100644 --- a/src/commands/decompress.rs +++ b/src/commands/decompress.rs @@ -7,7 +7,7 @@ use std::{ use fs_err as fs; use crate::{ - commands::warn_user_about_loading_zip_in_memory, + commands::{warn_user_about_loading_zip_in_memory, warn_user_about_loading_sevenz_in_memory}, extension::{ split_first_compression_format, CompressionFormat::{self, *}, @@ -165,8 +165,19 @@ pub fn decompress_file( } }, SevenZip => { + if formats.len() > 1 { + warn_user_about_loading_sevenz_in_memory(); + + if !user_wants_to_continue(input_file_path, question_policy, QuestionAction::Decompression)? { + return Ok(()); + } + } + + let mut vec = vec![]; + io::copy(&mut reader, &mut vec)?; + if let ControlFlow::Continue(files) = smart_unpack( - |output_dir| crate::archive::sevenz::decompress_sevenz(input_file_path, output_dir), + |output_dir| crate::archive::sevenz::decompress_sevenz(io::Cursor::new(vec), output_dir, quiet), output_dir, &output_file_path, question_policy, diff --git a/src/commands/mod.rs b/src/commands/mod.rs index e5be233..b9763d1 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -31,6 +31,16 @@ fn warn_user_about_loading_zip_in_memory() { warning!("{}", ZIP_IN_MEMORY_LIMITATION_WARNING); } +/// Warn the user that (de)compressing this .7z archive might freeze their system. +fn warn_user_about_loading_sevenz_in_memory() { + const SEVENZ_IN_MEMORY_LIMITATION_WARNING: &str = "\n\ + \tThe format '.7z' is limited and cannot be (de)compressed using encoding streams.\n\ + \tWhen using '.7z' with other formats, (de)compression must be done in-memory\n\ + \tCareful, you might run out of RAM if the archive is too large!"; + + warning!("{}", SEVENZ_IN_MEMORY_LIMITATION_WARNING); +} + /// This function checks what command needs to be run and performs A LOT of ahead-of-time checks /// to assume everything is OK. /// diff --git a/src/main.rs b/src/main.rs index 809c4e6..060177d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,7 +22,7 @@ use utils::{QuestionAction, QuestionPolicy}; const BUFFER_CAPACITY: usize = 1024 * 32; /// Current directory or empty directory -static CURRENT_DIRECTORY: Lazy = Lazy::new(|| env::current_dir().unwrap_or_default()); +static CURRENT_DIRECTORY: Lazy = Lazy::new(|| std::fs::canonicalize(env::current_dir().unwrap_or_default()).unwrap_or_default()); /// The status code returned from `ouch` on error pub const EXIT_FAILURE: i32 = libc::EXIT_FAILURE; diff --git a/tests/ui.rs b/tests/ui.rs index c72f052..eee6e33 100644 --- a/tests/ui.rs +++ b/tests/ui.rs @@ -62,8 +62,12 @@ fn ui_test_err_compress_missing_extension() { let (_dropper, dir) = testdir().unwrap(); // prepare + #[cfg(not(windows))] run_in(dir, "touch", "input").unwrap(); + #[cfg(windows)] + run_in(dir, "cmd", "/C copy nul input").unwrap(); + ui!(run_ouch("ouch compress input output", dir)); } @@ -71,8 +75,16 @@ fn ui_test_err_compress_missing_extension() { fn ui_test_err_decompress_missing_extension() { let (_dropper, dir) = testdir().unwrap(); + #[cfg(not(windows))] run_in(dir, "touch", "a b.unknown").unwrap(); + #[cfg(windows)] + run_in(dir, "cmd", "/C copy nul a").unwrap(); + + #[cfg(windows)] + run_in(dir, "cmd", "/C copy nul b.unknown").unwrap(); + + ui!(run_ouch("ouch decompress a", dir)); ui!(run_ouch("ouch decompress a b.unknown", dir)); ui!(run_ouch("ouch decompress b.unknown", dir)); @@ -92,8 +104,12 @@ fn ui_test_ok_compress() { let (_dropper, dir) = testdir().unwrap(); // prepare + #[cfg(not(windows))] run_in(dir, "touch", "input").unwrap(); + #[cfg(windows)] + run_in(dir, "cmd", "/C copy nul input").unwrap(); + ui!(run_ouch("ouch compress input output.zip", dir)); ui!(run_ouch("ouch compress input output.gz", dir)); } @@ -103,6 +119,7 @@ fn ui_test_ok_decompress() { let (_dropper, dir) = testdir().unwrap(); // prepare + #[cfg(not(windows))] run_in(dir, "touch", "input").unwrap(); run_ouch("ouch compress input output.zst", dir);