From 77c1a4e9db962a6c7e4d378c3a0533083ab62b78 Mon Sep 17 00:00:00 2001 From: Ryan Roden-Corrent Date: Sun, 14 Jul 2024 15:32:02 -0400 Subject: [PATCH] Support decompressing stdin. Fixes #687. If "-" is passed as a filename, decompress data from stdin. Currently `--format` must be passed as well, but as a next step, we could try to infer the format from magic numbers. As stdin is not connected to the terminal, we cannot prompt for Y/N when warning about decompression in memory, for e.g. zip. Just default to No, and require passing "-y" in these cases. For zip, we have to buffer the whole stream in memory to seek into it, just as we do with a chained decoder like `.zip.bz`. The rar format requires an actual file (not an `impl Read`), so we write a temp file that it can decode. When decoding a single-file archive (e.g. file.bz), the output filename is just `-`, since we don't know the original filename. I had to add a bit of a hack to the tests to work around this. Another option would be to interpret "-d" as a destination filename in this case. When decoding a multi-file archive, I decided to unpack directly into the destination directory, as this seemed like a better experience than adding a top-level "-" folder inside the destination. --- CHANGELOG.md | 1 + src/cli/args.rs | 2 +- src/cli/mod.rs | 17 ++++++++- src/commands/decompress.rs | 60 ++++++++++++++++++++---------- src/utils/fs.rs | 4 ++ src/utils/mod.rs | 3 +- src/utils/question.rs | 8 +++- tests/integration.rs | 76 +++++++++++++++++++++++++++++++++++++- tests/utils.rs | 5 ++- 9 files changed, 149 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c63056..7d6f909 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ Categories Used: ### Improvements - Fix logging IO bottleneck [\#642](https://github.com/ouch-org/ouch/pull/642) ([AntoniosBarotsis](https://github.com/AntoniosBarotsis)) +- Support decompression over stdin [\#692](https://github.com/ouch-org/ouch/pull/692) ([rcorre](https://github.com/rcorre)) ## [0.5.1](https://github.com/ouch-org/ouch/compare/0.5.0...0.5.1) diff --git a/src/cli/args.rs b/src/cli/args.rs index 85712a4..9826c78 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -77,7 +77,7 @@ pub enum Subcommand { /// Decompresses one or more files, optionally into another folder #[command(visible_alias = "d")] Decompress { - /// Files to be decompressed + /// Files to be decompressed, or "-" for stdin #[arg(required = true, num_args = 1.., value_hint = ValueHint::FilePath)] files: Vec, diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 69bf213..ce6b5d5 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -11,7 +11,11 @@ use clap::Parser; use fs_err as fs; pub use self::args::{CliArgs, Subcommand}; -use crate::{accessible::set_accessible, utils::FileVisibilityPolicy, QuestionPolicy}; +use crate::{ + accessible::set_accessible, + utils::{is_path_stdin, FileVisibilityPolicy}, + QuestionPolicy, +}; impl CliArgs { /// A helper method that calls `clap::Parser::parse`. @@ -47,5 +51,14 @@ impl CliArgs { } fn canonicalize_files(files: &[impl AsRef]) -> io::Result> { - files.iter().map(fs::canonicalize).collect() + files + .iter() + .map(|f| { + if is_path_stdin(f.as_ref()) { + Ok(f.as_ref().to_path_buf()) + } else { + fs::canonicalize(f) + } + }) + .collect() } diff --git a/src/commands/decompress.rs b/src/commands/decompress.rs index 57ed969..5d4e8c7 100644 --- a/src/commands/decompress.rs +++ b/src/commands/decompress.rs @@ -14,11 +14,15 @@ use crate::{ Extension, }, utils::{ - self, io::lock_and_flush_output_stdio, logger::info_accessible, nice_directory_display, user_wants_to_continue, + self, io::lock_and_flush_output_stdio, is_path_stdin, logger::info_accessible, nice_directory_display, + user_wants_to_continue, }, QuestionAction, QuestionPolicy, BUFFER_CAPACITY, }; +trait ReadSeek: Read + io::Seek {} +impl ReadSeek for T {} + /// Decompress a file /// /// File at input_file_path is opened for reading, example: "archive.tar.gz" @@ -34,7 +38,7 @@ pub fn decompress_file( quiet: bool, ) -> crate::Result<()> { assert!(output_dir.exists()); - let reader = fs::File::open(input_file_path)?; + let input_is_stdin = is_path_stdin(input_file_path); // Zip archives are special, because they require io::Seek, so it requires it's logic separated // from decoder chaining. @@ -48,6 +52,14 @@ pub fn decompress_file( .. }] = formats.as_slice() { + let mut vec = vec![]; + let reader: Box = if input_is_stdin { + warn_user_about_loading_zip_in_memory(); + io::copy(&mut io::stdin(), &mut vec)?; + Box::new(io::Cursor::new(vec)) + } else { + Box::new(fs::File::open(input_file_path)?) + }; let zip_archive = zip::ZipArchive::new(reader)?; let files_unpacked = if let ControlFlow::Continue(files) = smart_unpack( |output_dir| crate::archive::zip::unpack_archive(zip_archive, output_dir, quiet), @@ -74,6 +86,11 @@ pub fn decompress_file( } // Will be used in decoder chaining + let reader: Box = if input_is_stdin { + Box::new(io::stdin()) + } else { + Box::new(fs::File::open(input_file_path)?) + }; let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader); let mut reader: Box = Box::new(reader); @@ -152,7 +169,7 @@ pub fn decompress_file( #[cfg(feature = "unrar")] Rar => { type UnpackResult = crate::Result; - let unpack_fn: Box UnpackResult> = if formats.len() > 1 { + let unpack_fn: Box UnpackResult> = if formats.len() > 1 || input_is_stdin { let mut temp_file = tempfile::NamedTempFile::new()?; io::copy(&mut reader, &mut temp_file)?; Box::new(move |output_dir| crate::archive::rar::unpack_archive(temp_file.path(), output_dir, quiet)) @@ -236,25 +253,28 @@ fn smart_unpack( let files = unpack_fn(temp_dir_path)?; let root_contains_only_one_element = fs::read_dir(temp_dir_path)?.count() == 1; - if root_contains_only_one_element { + if is_path_stdin(output_file_path) || root_contains_only_one_element { // Only one file in the root directory, so we can just move it to the output directory - let file = fs::read_dir(temp_dir_path)?.next().expect("item exists")?; - let file_path = file.path(); - let file_name = file_path - .file_name() - .expect("Should be safe because paths in archives should not end with '..'"); - let correct_path = output_dir.join(file_name); - // Before moving, need to check if a file with the same name already exists - if !utils::clear_path(&correct_path, question_policy)? { - return Ok(ControlFlow::Break(())); - } - fs::rename(&file_path, &correct_path)?; + // If we're decompressing an archive from stdin, we can also put the files directly in the output directory. + let files = fs::read_dir(temp_dir_path)?; + for file in files { + let file_path = file?.path(); + let file_name = file_path + .file_name() + .expect("Should be safe because paths in archives should not end with '..'"); + let correct_path = output_dir.join(file_name); + // Before moving, need to check if a file with the same name already exists + if !utils::clear_path(&correct_path, question_policy)? { + return Ok(ControlFlow::Break(())); + } + fs::rename(&file_path, &correct_path)?; - info_accessible(format!( - "Successfully moved {} to {}.", - nice_directory_display(&file_path), - nice_directory_display(&correct_path) - )); + info_accessible(format!( + "Successfully moved {} to {}.", + nice_directory_display(&file_path), + nice_directory_display(&correct_path) + )); + } } else { // Multiple files in the root directory, so: // Rename the temporary directory to the archive name, which is output_file_path diff --git a/src/utils/fs.rs b/src/utils/fs.rs index c0eb385..2e4f766 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -15,6 +15,10 @@ use crate::{ QuestionPolicy, }; +pub fn is_path_stdin(path: &Path) -> bool { + path.as_os_str() == "-" +} + /// Remove `path` asking the user to overwrite if necessary. /// /// * `Ok(true)` means the path is clear, diff --git a/src/utils/mod.rs b/src/utils/mod.rs index dce133d..e2726f7 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -16,7 +16,8 @@ pub use formatting::{ nice_directory_display, pretty_format_list_of_paths, strip_cur_dir, to_utf, Bytes, EscapedPathDisplay, }; pub use fs::{ - cd_into_same_dir_as, clear_path, create_dir_if_non_existent, is_symlink, remove_file_or_dir, try_infer_extension, + cd_into_same_dir_as, clear_path, create_dir_if_non_existent, is_path_stdin, is_symlink, remove_file_or_dir, + try_infer_extension, }; pub use question::{ ask_to_create_file, user_wants_to_continue, user_wants_to_overwrite, QuestionAction, QuestionPolicy, diff --git a/src/utils/question.rs b/src/utils/question.rs index f828ee8..0712203 100644 --- a/src/utils/question.rs +++ b/src/utils/question.rs @@ -5,7 +5,7 @@ use std::{ borrow::Cow, - io::{stdin, BufRead}, + io::{stdin, BufRead, IsTerminal}, path::Path, }; @@ -121,6 +121,12 @@ impl<'a> Confirmation<'a> { (Some(placeholder), Some(subs)) => Cow::Owned(self.prompt.replace(placeholder, subs)), }; + if !stdin().is_terminal() { + eprintln!("{}", message); + eprintln!("Pass --yes to proceed"); + return Ok(false); + } + let _locks = lock_and_flush_output_stdio()?; let mut stdin_lock = stdin().lock(); diff --git a/tests/integration.rs b/tests/integration.rs index 233e2fb..04adc55 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -6,7 +6,7 @@ use std::{iter::once, path::PathBuf}; use fs_err as fs; use parse_display::Display; use proptest::sample::size_range; -use rand::{rngs::SmallRng, Rng, SeedableRng}; +use rand::{rngs::SmallRng, Rng, RngCore as _, SeedableRng}; use tempfile::tempdir; use test_strategy::{proptest, Arbitrary}; @@ -128,6 +128,43 @@ fn single_file( assert_same_directory(before, after, false); } +/// Compress and decompress a single file over stdin. +#[proptest(cases = 200)] +fn single_file_stdin( + ext: Extension, + #[any(size_range(0..8).lift())] exts: Vec, + #[cfg_attr(not(target_arch = "arm"), strategy(proptest::option::of(0i16..12)))] + // Decrease the value of --level flag for `arm` systems, because our GitHub + // Actions CI runs QEMU which makes the memory consumption higher. + #[cfg_attr(target_arch = "arm", strategy(proptest::option::of(0i16..8)))] + level: Option, +) { + let dir = tempdir().unwrap(); + let dir = dir.path(); + let before = &dir.join("before"); + fs::create_dir(before).unwrap(); + let before_file = &before.join("file"); + let format = merge_extensions(ext, exts); + let archive = &dir.join(format!("file.{}", format)); + let after = &dir.join("after"); + write_random_content( + &mut fs::File::create(before_file).unwrap(), + &mut SmallRng::from_entropy(), + ); + if let Some(level) = level { + ouch!("-A", "c", "-l", level.to_string(), before_file, archive); + } else { + ouch!("-A", "c", before_file, archive); + } + crate::utils::cargo_bin() + .args(["-A", "-y", "d", "-", "-d", after.to_str().unwrap(), "--format", &format]) + .pipe_stdin(archive) + .unwrap() + .assert() + .success(); + assert_same_directory(before, after, false); +} + /// Compress and decompress a directory with random content generated with create_random_files /// /// This one runs only 50 times because there are only `.zip` and `.tar` to be tested, and @@ -173,3 +210,40 @@ fn unpack_rar() -> Result<(), Box> { Ok(()) } + +#[cfg(feature = "unrar")] +#[test] +fn unpack_rar_stdin() -> Result<(), Box> { + fn test_unpack_rar_single(input: &std::path::Path, format: &str) -> Result<(), Box> { + let dir = tempdir()?; + let dirpath = dir.path(); + let unpacked_path = &dirpath.join("testfile.txt"); + crate::utils::cargo_bin() + .args([ + "-A", + "-y", + "d", + "-", + "-d", + dirpath.to_str().unwrap(), + "--format", + format, + ]) + .pipe_stdin(input) + .unwrap() + .assert() + .success(); + let content = fs::read_to_string(unpacked_path)?; + assert_eq!(content, "Testing 123\n"); + + Ok(()) + } + + let mut datadir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR")?); + datadir.push("tests/data"); + [("testfile.rar3.rar.gz", "rar.gz"), ("testfile.rar5.rar", "rar")] + .iter() + .try_for_each(|(path, format)| test_unpack_rar_single(&datadir.join(path), format))?; + + Ok(()) +} diff --git a/tests/utils.rs b/tests/utils.rs index bd5fc34..4ad40d0 100644 --- a/tests/utils.rs +++ b/tests/utils.rs @@ -73,7 +73,10 @@ pub fn assert_same_directory(x: impl Into, y: impl Into, prese loop { match (x.next(), y.next()) { (Some(x), Some(y)) => { - assert_eq!(x.file_name(), y.file_name()); + // If decompressing from stdin, the file name will be "-". + if x.file_name() != "-" && y.file_name() != "-" { + assert_eq!(x.file_name(), y.file_name()); + } let meta_x = x.metadata().unwrap(); let meta_y = y.metadata().unwrap();