Support decompressing stdin.

Fixes #687.

If "-" is passed as a filename, decompress data from stdin.

Currently `--format` must be passed as well, but as a next step,
we could try to infer the format from magic numbers.

As stdin is not connected to the terminal, we cannot prompt for Y/N
when warning about decompression in memory, for e.g. zip. Just default
to No, and require passing "-y" in these cases.

For zip, we have to buffer the whole stream in memory to seek into it,
just as we do with a chained decoder like `.zip.bz`.

The rar format requires an actual file (not an `impl Read`), so
we write a temp file that it can decode.

When decoding a single-file archive (e.g. file.bz), the output filename
is just `-`, since we don't know the original filename. I had to add
a bit of a hack to the tests to work around this. Another option
would be to interpret "-d" as a destination filename in this case.

When decoding a multi-file archive, I decided to unpack directly into
the destination directory, as this seemed like a better experience than
adding a top-level "-" folder inside the destination.
This commit is contained in:
Ryan Roden-Corrent 2024-07-14 15:32:02 -04:00 committed by João Marcos
parent 3867fa33e9
commit 77c1a4e9db
9 changed files with 149 additions and 27 deletions

View File

@ -35,6 +35,7 @@ Categories Used:
### Improvements
- Fix logging IO bottleneck [\#642](https://github.com/ouch-org/ouch/pull/642) ([AntoniosBarotsis](https://github.com/AntoniosBarotsis))
- Support decompression over stdin [\#692](https://github.com/ouch-org/ouch/pull/692) ([rcorre](https://github.com/rcorre))
## [0.5.1](https://github.com/ouch-org/ouch/compare/0.5.0...0.5.1)

View File

@ -77,7 +77,7 @@ pub enum Subcommand {
/// Decompresses one or more files, optionally into another folder
#[command(visible_alias = "d")]
Decompress {
/// Files to be decompressed
/// Files to be decompressed, or "-" for stdin
#[arg(required = true, num_args = 1.., value_hint = ValueHint::FilePath)]
files: Vec<PathBuf>,

View File

@ -11,7 +11,11 @@ use clap::Parser;
use fs_err as fs;
pub use self::args::{CliArgs, Subcommand};
use crate::{accessible::set_accessible, utils::FileVisibilityPolicy, QuestionPolicy};
use crate::{
accessible::set_accessible,
utils::{is_path_stdin, FileVisibilityPolicy},
QuestionPolicy,
};
impl CliArgs {
/// A helper method that calls `clap::Parser::parse`.
@ -47,5 +51,14 @@ impl CliArgs {
}
fn canonicalize_files(files: &[impl AsRef<Path>]) -> io::Result<Vec<PathBuf>> {
files.iter().map(fs::canonicalize).collect()
files
.iter()
.map(|f| {
if is_path_stdin(f.as_ref()) {
Ok(f.as_ref().to_path_buf())
} else {
fs::canonicalize(f)
}
})
.collect()
}

View File

@ -14,11 +14,15 @@ use crate::{
Extension,
},
utils::{
self, io::lock_and_flush_output_stdio, logger::info_accessible, nice_directory_display, user_wants_to_continue,
self, io::lock_and_flush_output_stdio, is_path_stdin, logger::info_accessible, nice_directory_display,
user_wants_to_continue,
},
QuestionAction, QuestionPolicy, BUFFER_CAPACITY,
};
trait ReadSeek: Read + io::Seek {}
impl<T: Read + io::Seek> ReadSeek for T {}
/// Decompress a file
///
/// File at input_file_path is opened for reading, example: "archive.tar.gz"
@ -34,7 +38,7 @@ pub fn decompress_file(
quiet: bool,
) -> crate::Result<()> {
assert!(output_dir.exists());
let reader = fs::File::open(input_file_path)?;
let input_is_stdin = is_path_stdin(input_file_path);
// Zip archives are special, because they require io::Seek, so it requires it's logic separated
// from decoder chaining.
@ -48,6 +52,14 @@ pub fn decompress_file(
..
}] = formats.as_slice()
{
let mut vec = vec![];
let reader: Box<dyn ReadSeek> = if input_is_stdin {
warn_user_about_loading_zip_in_memory();
io::copy(&mut io::stdin(), &mut vec)?;
Box::new(io::Cursor::new(vec))
} else {
Box::new(fs::File::open(input_file_path)?)
};
let zip_archive = zip::ZipArchive::new(reader)?;
let files_unpacked = if let ControlFlow::Continue(files) = smart_unpack(
|output_dir| crate::archive::zip::unpack_archive(zip_archive, output_dir, quiet),
@ -74,6 +86,11 @@ pub fn decompress_file(
}
// Will be used in decoder chaining
let reader: Box<dyn Read> = if input_is_stdin {
Box::new(io::stdin())
} else {
Box::new(fs::File::open(input_file_path)?)
};
let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader);
let mut reader: Box<dyn Read> = Box::new(reader);
@ -152,7 +169,7 @@ pub fn decompress_file(
#[cfg(feature = "unrar")]
Rar => {
type UnpackResult = crate::Result<usize>;
let unpack_fn: Box<dyn FnOnce(&Path) -> UnpackResult> = if formats.len() > 1 {
let unpack_fn: Box<dyn FnOnce(&Path) -> UnpackResult> = if formats.len() > 1 || input_is_stdin {
let mut temp_file = tempfile::NamedTempFile::new()?;
io::copy(&mut reader, &mut temp_file)?;
Box::new(move |output_dir| crate::archive::rar::unpack_archive(temp_file.path(), output_dir, quiet))
@ -236,25 +253,28 @@ fn smart_unpack(
let files = unpack_fn(temp_dir_path)?;
let root_contains_only_one_element = fs::read_dir(temp_dir_path)?.count() == 1;
if root_contains_only_one_element {
if is_path_stdin(output_file_path) || root_contains_only_one_element {
// Only one file in the root directory, so we can just move it to the output directory
let file = fs::read_dir(temp_dir_path)?.next().expect("item exists")?;
let file_path = file.path();
let file_name = file_path
.file_name()
.expect("Should be safe because paths in archives should not end with '..'");
let correct_path = output_dir.join(file_name);
// Before moving, need to check if a file with the same name already exists
if !utils::clear_path(&correct_path, question_policy)? {
return Ok(ControlFlow::Break(()));
}
fs::rename(&file_path, &correct_path)?;
// If we're decompressing an archive from stdin, we can also put the files directly in the output directory.
let files = fs::read_dir(temp_dir_path)?;
for file in files {
let file_path = file?.path();
let file_name = file_path
.file_name()
.expect("Should be safe because paths in archives should not end with '..'");
let correct_path = output_dir.join(file_name);
// Before moving, need to check if a file with the same name already exists
if !utils::clear_path(&correct_path, question_policy)? {
return Ok(ControlFlow::Break(()));
}
fs::rename(&file_path, &correct_path)?;
info_accessible(format!(
"Successfully moved {} to {}.",
nice_directory_display(&file_path),
nice_directory_display(&correct_path)
));
info_accessible(format!(
"Successfully moved {} to {}.",
nice_directory_display(&file_path),
nice_directory_display(&correct_path)
));
}
} else {
// Multiple files in the root directory, so:
// Rename the temporary directory to the archive name, which is output_file_path

View File

@ -15,6 +15,10 @@ use crate::{
QuestionPolicy,
};
pub fn is_path_stdin(path: &Path) -> bool {
path.as_os_str() == "-"
}
/// Remove `path` asking the user to overwrite if necessary.
///
/// * `Ok(true)` means the path is clear,

View File

@ -16,7 +16,8 @@ pub use formatting::{
nice_directory_display, pretty_format_list_of_paths, strip_cur_dir, to_utf, Bytes, EscapedPathDisplay,
};
pub use fs::{
cd_into_same_dir_as, clear_path, create_dir_if_non_existent, is_symlink, remove_file_or_dir, try_infer_extension,
cd_into_same_dir_as, clear_path, create_dir_if_non_existent, is_path_stdin, is_symlink, remove_file_or_dir,
try_infer_extension,
};
pub use question::{
ask_to_create_file, user_wants_to_continue, user_wants_to_overwrite, QuestionAction, QuestionPolicy,

View File

@ -5,7 +5,7 @@
use std::{
borrow::Cow,
io::{stdin, BufRead},
io::{stdin, BufRead, IsTerminal},
path::Path,
};
@ -121,6 +121,12 @@ impl<'a> Confirmation<'a> {
(Some(placeholder), Some(subs)) => Cow::Owned(self.prompt.replace(placeholder, subs)),
};
if !stdin().is_terminal() {
eprintln!("{}", message);
eprintln!("Pass --yes to proceed");
return Ok(false);
}
let _locks = lock_and_flush_output_stdio()?;
let mut stdin_lock = stdin().lock();

View File

@ -6,7 +6,7 @@ use std::{iter::once, path::PathBuf};
use fs_err as fs;
use parse_display::Display;
use proptest::sample::size_range;
use rand::{rngs::SmallRng, Rng, SeedableRng};
use rand::{rngs::SmallRng, Rng, RngCore as _, SeedableRng};
use tempfile::tempdir;
use test_strategy::{proptest, Arbitrary};
@ -128,6 +128,43 @@ fn single_file(
assert_same_directory(before, after, false);
}
/// Compress and decompress a single file over stdin.
#[proptest(cases = 200)]
fn single_file_stdin(
ext: Extension,
#[any(size_range(0..8).lift())] exts: Vec<FileExtension>,
#[cfg_attr(not(target_arch = "arm"), strategy(proptest::option::of(0i16..12)))]
// Decrease the value of --level flag for `arm` systems, because our GitHub
// Actions CI runs QEMU which makes the memory consumption higher.
#[cfg_attr(target_arch = "arm", strategy(proptest::option::of(0i16..8)))]
level: Option<i16>,
) {
let dir = tempdir().unwrap();
let dir = dir.path();
let before = &dir.join("before");
fs::create_dir(before).unwrap();
let before_file = &before.join("file");
let format = merge_extensions(ext, exts);
let archive = &dir.join(format!("file.{}", format));
let after = &dir.join("after");
write_random_content(
&mut fs::File::create(before_file).unwrap(),
&mut SmallRng::from_entropy(),
);
if let Some(level) = level {
ouch!("-A", "c", "-l", level.to_string(), before_file, archive);
} else {
ouch!("-A", "c", before_file, archive);
}
crate::utils::cargo_bin()
.args(["-A", "-y", "d", "-", "-d", after.to_str().unwrap(), "--format", &format])
.pipe_stdin(archive)
.unwrap()
.assert()
.success();
assert_same_directory(before, after, false);
}
/// Compress and decompress a directory with random content generated with create_random_files
///
/// This one runs only 50 times because there are only `.zip` and `.tar` to be tested, and
@ -173,3 +210,40 @@ fn unpack_rar() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}
#[cfg(feature = "unrar")]
#[test]
fn unpack_rar_stdin() -> Result<(), Box<dyn std::error::Error>> {
fn test_unpack_rar_single(input: &std::path::Path, format: &str) -> Result<(), Box<dyn std::error::Error>> {
let dir = tempdir()?;
let dirpath = dir.path();
let unpacked_path = &dirpath.join("testfile.txt");
crate::utils::cargo_bin()
.args([
"-A",
"-y",
"d",
"-",
"-d",
dirpath.to_str().unwrap(),
"--format",
format,
])
.pipe_stdin(input)
.unwrap()
.assert()
.success();
let content = fs::read_to_string(unpacked_path)?;
assert_eq!(content, "Testing 123\n");
Ok(())
}
let mut datadir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR")?);
datadir.push("tests/data");
[("testfile.rar3.rar.gz", "rar.gz"), ("testfile.rar5.rar", "rar")]
.iter()
.try_for_each(|(path, format)| test_unpack_rar_single(&datadir.join(path), format))?;
Ok(())
}

View File

@ -73,7 +73,10 @@ pub fn assert_same_directory(x: impl Into<PathBuf>, y: impl Into<PathBuf>, prese
loop {
match (x.next(), y.next()) {
(Some(x), Some(y)) => {
assert_eq!(x.file_name(), y.file_name());
// If decompressing from stdin, the file name will be "-".
if x.file_name() != "-" && y.file_name() != "-" {
assert_eq!(x.file_name(), y.file_name());
}
let meta_x = x.metadata().unwrap();
let meta_y = y.metadata().unwrap();