From 30ebcf4f9e45558bd06c11f1ed3b31f132f159ba Mon Sep 17 00:00:00 2001 From: Anton Hermann Date: Sun, 31 Oct 2021 18:18:50 +0100 Subject: [PATCH] Implement command 'list' to show archive contents --- src/archive/tar.rs | 13 +++++ src/archive/zip.rs | 17 +++++++ src/cli.rs | 4 +- src/commands.rs | 117 +++++++++++++++++++++++++++++++++++++++++++++ src/extension.rs | 5 ++ src/lib.rs | 1 + src/list.rs | 28 +++++++++++ src/opts.rs | 11 +++++ 8 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 src/list.rs diff --git a/src/archive/tar.rs b/src/archive/tar.rs index 4c0d96e..eec196e 100644 --- a/src/archive/tar.rs +++ b/src/archive/tar.rs @@ -13,6 +13,7 @@ use walkdir::WalkDir; use crate::{ error::FinalError, info, + list::FileInArchive, utils::{self, Bytes}, QuestionPolicy, }; @@ -42,6 +43,18 @@ pub fn unpack_archive( Ok(files_unpacked) } +pub fn list_archive(reader: Box) -> crate::Result> { + let mut archive = tar::Archive::new(reader); + + let mut files = vec![]; + for file in archive.entries()? { + let file = file?; + + files.push(FileInArchive { path: file.path()?.into_owned() }); + } + + Ok(files) +} pub fn build_archive_from_paths(input_filenames: &[PathBuf], writer: W) -> crate::Result where diff --git a/src/archive/zip.rs b/src/archive/zip.rs index a43af53..d0a3ffd 100644 --- a/src/archive/zip.rs +++ b/src/archive/zip.rs @@ -13,6 +13,7 @@ use zip::{self, read::ZipFile, ZipArchive}; use crate::{ info, + list::FileInArchive, utils::{self, dir_is_empty, strip_cur_dir, Bytes}, QuestionPolicy, }; @@ -73,6 +74,22 @@ where Ok(unpacked_files) } +pub fn list_archive(mut archive: ZipArchive) -> crate::Result> +where + R: Read + Seek, +{ + let mut files = vec![]; + for idx in 0..archive.len() { + let file = archive.by_index(idx)?; + let path = match file.enclosed_name() { + Some(path) => path.to_owned(), + None => continue, + }; + files.push(FileInArchive { path }); + } + Ok(files) +} + pub fn build_archive_from_paths(input_filenames: &[PathBuf], writer: W) -> crate::Result where W: Write + Seek, diff --git a/src/cli.rs b/src/cli.rs index 4ef39ec..b9051b0 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -16,7 +16,9 @@ impl Opts { pub fn parse_args() -> crate::Result<(Self, QuestionPolicy)> { let mut opts: Self = Self::parse(); - let (Subcommand::Compress { files, .. } | Subcommand::Decompress { files, .. }) = &mut opts.cmd; + let (Subcommand::Compress { files, .. } + | Subcommand::Decompress { files, .. } + | Subcommand::List { archives: files, .. }) = &mut opts.cmd; *files = canonicalize_files(files)?; let skip_questions_positively = if opts.yes { diff --git a/src/commands.rs b/src/commands.rs index c5d2875..bad5775 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -18,6 +18,7 @@ use crate::{ CompressionFormat::{self, *}, }, info, + list::{self, ListOptions}, utils::{self, dir_is_empty, nice_directory_display, to_utf}, Error, Opts, QuestionPolicy, Subcommand, }; @@ -178,6 +179,40 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> { decompress_file(input_path, formats, output_dir, file_name, question_policy)?; } } + Subcommand::List { archives: files, tree } => { + let mut formats = vec![]; + + for path in files.iter() { + let (_, file_formats) = extension::separate_known_extensions_from_name(path); + formats.push(file_formats); + } + + let not_archives: Vec = files + .iter() + .zip(&formats) + .filter(|(_, formats)| formats.is_empty() || !formats[0].is_archive()) + .map(|(path, _)| path.clone()) + .collect(); + + // Error + if !not_archives.is_empty() { + eprintln!("Some file you asked ouch to list the contents of is not an archive."); + for file in ¬_archives { + eprintln!("Could not list {}.", to_utf(file)); + } + todo!( + "Dev note: add this error variant and pass the Vec to it, all the files \ + lacking extension shall be shown: {:#?}.", + not_archives + ); + } + + let list_options = ListOptions { tree }; + + for (archive_path, formats) in files.iter().zip(formats) { + list_archive_contents(archive_path, formats, list_options)?; + } + } } Ok(()) } @@ -369,3 +404,85 @@ fn decompress_file( Ok(()) } + +// File at input_file_path is opened for reading, example: "archive.tar.gz" +// formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order) +fn list_archive_contents( + archive_path: &Path, + formats: Vec, + list_options: ListOptions, +) -> crate::Result<()> { + // TODO: improve error message + let reader = fs::File::open(&archive_path)?; + + // Zip archives are special, because they require io::Seek, so it requires it's logic separated + // from decoder chaining. + // + // This is the only case where we can read and unpack it directly, without having to do + // in-memory decompression/copying first. + // + // Any other Zip decompression done can take up the whole RAM and freeze ouch. + if let [Zip] = *formats.as_slice() { + let zip_archive = zip::ZipArchive::new(reader)?; + let files = crate::archive::zip::list_archive(zip_archive)?; + list::list_files(files, list_options); + return Ok(()); + } + + // Will be used in decoder chaining + let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader); + let mut reader: Box = Box::new(reader); + + // Grab previous decoder and wrap it inside of a new one + let chain_reader_decoder = |format: &CompressionFormat, decoder: Box| -> crate::Result> { + let decoder: Box = match format { + Gzip => Box::new(flate2::read::GzDecoder::new(decoder)), + Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)), + Lzma => Box::new(xz2::read::XzDecoder::new(decoder)), + Zstd => Box::new(zstd::stream::Decoder::new(decoder)?), + _ => unreachable!(), + }; + Ok(decoder) + }; + + for format in formats.iter().skip(1).rev() { + reader = chain_reader_decoder(format, reader)?; + } + + let files = match formats[0] { + Tar => crate::archive::tar::list_archive(reader)?, + Tgz => { + let reader = chain_reader_decoder(&Gzip, reader)?; + crate::archive::tar::list_archive(reader)? + } + Tbz => { + let reader = chain_reader_decoder(&Bzip, reader)?; + crate::archive::tar::list_archive(reader)? + } + Tlzma => { + let reader = chain_reader_decoder(&Lzma, reader)?; + crate::archive::tar::list_archive(reader)? + } + Tzst => { + let reader = chain_reader_decoder(&Zstd, reader)?; + crate::archive::tar::list_archive(reader)? + } + Zip => { + eprintln!("Listing files from zip archive."); + eprintln!("Warning: .zip archives with extra extensions have a downside."); + eprintln!("The only way is loading everything into the RAM while compressing, and then reading the archive contents."); + eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!"); + + let mut vec = vec![]; + io::copy(&mut reader, &mut vec)?; + let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?; + + crate::archive::zip::list_archive(zip_archive)? + } + Gzip | Bzip | Lzma | Zstd => { + panic!("Not an archive! This should never happen, if it does, something is wrong with `CompressionFormat::is_archive()`. Please report this error!"); + } + }; + list::list_files(files, list_options); + Ok(()) +} diff --git a/src/extension.rs b/src/extension.rs index 9ff200f..956378a 100644 --- a/src/extension.rs +++ b/src/extension.rs @@ -45,6 +45,11 @@ impl fmt::Display for CompressionFormat { ) } } +impl CompressionFormat { + pub fn is_archive(&self) -> bool { + matches!(self, Tar | Tgz | Tbz | Tlzma | Tzst | Zip) + } +} pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec) { // // TODO: check for file names with the name of an extension diff --git a/src/lib.rs b/src/lib.rs index c6f5a07..9562ea2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ mod cli; mod dialogs; mod error; mod extension; +mod list; mod macros; mod opts; mod utils; diff --git a/src/list.rs b/src/list.rs new file mode 100644 index 0000000..f75f359 --- /dev/null +++ b/src/list.rs @@ -0,0 +1,28 @@ +//! Implementation of the 'list' command, print list of files in an archive + +use std::path::PathBuf; + +/// Options controlling how archive contents should be listed +#[derive(Debug, Clone, Copy)] +pub struct ListOptions { + /// Whether to show a tree view + pub tree: bool, +} + +/// Represents a single file in an archive, used in `list::list_files()` +#[derive(Debug, Clone)] +pub struct FileInArchive { + /// The file path + pub path: PathBuf, +} + +/// Actually print the files +pub fn list_files(files: Vec, list_options: ListOptions) { + if list_options.tree { + todo!("Implement tree view"); + } else { + for file in files { + println!("{}", file.path.display()); + } + } +} diff --git a/src/opts.rs b/src/opts.rs index 24bc299..447cb40 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -41,4 +41,15 @@ pub enum Subcommand { #[clap(short, long = "dir", value_hint = ValueHint::DirPath)] output_dir: Option, }, + /// List contents. Alias: l + #[clap(alias = "l")] + List { + /// Archives whose contents should be listed + #[clap(required = true, min_values = 1)] + archives: Vec, + + /// Show archive contents as a tree + #[clap(short, long)] + tree: bool, + }, }