diff --git a/Cargo.lock b/Cargo.lock index ef911d1..e661588 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + [[package]] name = "lzma-sys" version = "0.1.17" @@ -303,6 +309,7 @@ dependencies = [ "fs-err", "infer", "libc", + "linked-hash-map", "once_cell", "rand", "tar", diff --git a/Cargo.toml b/Cargo.toml index 298b454..ea7c98a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ xz2 = "0.1.6" zip = { version = "0.5.13", default-features = false, features = ["deflate-miniz"] } flate2 = { version = "1.0.22", default-features = false, features = ["zlib"] } zstd = { version = "0.9.0", default-features = false, features = ["thin"] } +linked-hash-map = "0.5.4" [build-dependencies] clap = "=3.0.0-beta.5" diff --git a/src/archive/tar.rs b/src/archive/tar.rs index be5cefc..e0265fd 100644 --- a/src/archive/tar.rs +++ b/src/archive/tar.rs @@ -13,6 +13,7 @@ use walkdir::WalkDir; use crate::{ error::FinalError, info, + list::FileInArchive, utils::{self, Bytes}, QuestionPolicy, }; @@ -51,6 +52,23 @@ pub fn unpack_archive( Ok(files_unpacked) } +/// List contents of `archive`, returning a vector of archive entries +pub fn list_archive(reader: Box) -> crate::Result> { + let mut archive = tar::Archive::new(reader); + + let mut files = vec![]; + for file in archive.entries()? { + let file = file?; + + let path = file.path()?.into_owned(); + let is_dir = file.header().entry_type().is_dir(); + + files.push(FileInArchive { path, is_dir }); + } + + Ok(files) +} + /// Compresses the archives given by `input_filenames` into the file given previously to `writer`. pub fn build_archive_from_paths(input_filenames: &[PathBuf], writer: W) -> crate::Result where diff --git a/src/archive/zip.rs b/src/archive/zip.rs index 9085129..50ec853 100644 --- a/src/archive/zip.rs +++ b/src/archive/zip.rs @@ -13,6 +13,7 @@ use zip::{self, read::ZipFile, ZipArchive}; use crate::{ info, + list::FileInArchive, utils::{self, dir_is_empty, strip_cur_dir, Bytes}, QuestionPolicy, }; @@ -80,6 +81,26 @@ where Ok(unpacked_files) } +/// List contents of `archive`, returning a vector of archive entries +pub fn list_archive(mut archive: ZipArchive) -> crate::Result> +where + R: Read + Seek, +{ + let mut files = vec![]; + for idx in 0..archive.len() { + let file = archive.by_index(idx)?; + + let path = match file.enclosed_name() { + Some(path) => path.to_owned(), + None => continue, + }; + let is_dir = file.is_dir(); + + files.push(FileInArchive { path, is_dir }); + } + Ok(files) +} + /// Compresses the archives given by `input_filenames` into the file given previously to `writer`. pub fn build_archive_from_paths(input_filenames: &[PathBuf], writer: W) -> crate::Result where diff --git a/src/cli.rs b/src/cli.rs index 6d7b4a9..677471f 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -20,7 +20,9 @@ impl Opts { pub fn parse_args() -> crate::Result<(Self, QuestionPolicy)> { let mut opts = Self::parse(); - let (Subcommand::Compress { files, .. } | Subcommand::Decompress { files, .. }) = &mut opts.cmd; + let (Subcommand::Compress { files, .. } + | Subcommand::Decompress { files, .. } + | Subcommand::List { archives: files, .. }) = &mut opts.cmd; *files = canonicalize_files(files)?; let skip_questions_positively = if opts.yes { diff --git a/src/commands.rs b/src/commands.rs index bef9a8a..c4318d9 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -19,10 +19,24 @@ use crate::{ Extension, }, info, + list::{self, ListOptions}, utils::{self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf}, Opts, QuestionPolicy, Subcommand, }; +// use crate::{ +// archive, +// error::FinalError, +// extension::{ +// self, +// CompressionFormat::{self, *}, +// Extension, +// }, +// info, +// utils::{self, concatenate_list_of_os_str, dir_is_empty, nice_directory_display, to_utf}, +// Opts, QuestionPolicy, Subcommand, +// }; + // Used in BufReader and BufWriter to perform less syscalls const BUFFER_CAPACITY: usize = 1024 * 64; @@ -199,6 +213,44 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> { decompress_file(input_path, formats, output_dir, file_name, question_policy)?; } } + Subcommand::List { archives: files, tree } => { + let mut formats = vec![]; + + for path in files.iter() { + let (_, file_formats) = extension::separate_known_extensions_from_name(path); + formats.push(file_formats); + } + + let not_archives: Vec = files + .iter() + .zip(&formats) + .filter(|(_, formats)| !formats.get(0).map(Extension::is_archive).unwrap_or(false)) + .map(|(path, _)| path.clone()) + .collect(); + + // Error + if !not_archives.is_empty() { + eprintln!("Some file you asked ouch to list the contents of is not an archive."); + for file in ¬_archives { + eprintln!("Could not list {}.", to_utf(file)); + } + todo!( + "Dev note: add this error variant and pass the Vec to it, all the files \ + lacking extension shall be shown: {:#?}.", + not_archives + ); + } + + let list_options = ListOptions { tree }; + + for (i, (archive_path, formats)) in files.iter().zip(formats).enumerate() { + if i > 0 { + println!(); + } + let formats = formats.iter().flat_map(Extension::iter).map(Clone::clone).collect(); + list_archive_contents(archive_path, formats, list_options)?; + } + } } Ok(()) } @@ -365,3 +417,69 @@ fn decompress_file( Ok(()) } + +// File at input_file_path is opened for reading, example: "archive.tar.gz" +// formats contains each format necessary for decompression, example: [Gz, Tar] (in decompression order) +fn list_archive_contents( + archive_path: &Path, + formats: Vec, + list_options: ListOptions, +) -> crate::Result<()> { + // TODO: improve error message + let reader = fs::File::open(&archive_path)?; + + // Zip archives are special, because they require io::Seek, so it requires it's logic separated + // from decoder chaining. + // + // This is the only case where we can read and unpack it directly, without having to do + // in-memory decompression/copying first. + // + // Any other Zip decompression done can take up the whole RAM and freeze ouch. + if let [Zip] = *formats.as_slice() { + let zip_archive = zip::ZipArchive::new(reader)?; + let files = crate::archive::zip::list_archive(zip_archive)?; + list::list_files(archive_path, files, list_options); + return Ok(()); + } + + // Will be used in decoder chaining + let reader = BufReader::with_capacity(BUFFER_CAPACITY, reader); + let mut reader: Box = Box::new(reader); + + // Grab previous decoder and wrap it inside of a new one + let chain_reader_decoder = |format: &CompressionFormat, decoder: Box| -> crate::Result> { + let decoder: Box = match format { + Gzip => Box::new(flate2::read::GzDecoder::new(decoder)), + Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)), + Lzma => Box::new(xz2::read::XzDecoder::new(decoder)), + Zstd => Box::new(zstd::stream::Decoder::new(decoder)?), + _ => unreachable!(), + }; + Ok(decoder) + }; + + for format in formats.iter().skip(1).rev() { + reader = chain_reader_decoder(format, reader)?; + } + + let files = match formats[0] { + Tar => crate::archive::tar::list_archive(reader)?, + Zip => { + eprintln!("Listing files from zip archive."); + eprintln!("Warning: .zip archives with extra extensions have a downside."); + eprintln!("The only way is loading everything into the RAM while compressing, and then reading the archive contents."); + eprintln!("this means that by compressing .zip with extra compression formats, you can run out of RAM if the file is too large!"); + + let mut vec = vec![]; + io::copy(&mut reader, &mut vec)?; + let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?; + + crate::archive::zip::list_archive(zip_archive)? + } + Gzip | Bzip | Lzma | Zstd => { + panic!("Not an archive! This should never happen, if it does, something is wrong with `CompressionFormat::is_archive()`. Please report this error!"); + } + }; + list::list_files(archive_path, files, list_options); + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 71c51fe..137775e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ pub mod commands; pub mod dialogs; pub mod error; pub mod extension; +pub mod list; pub mod utils; /// CLI argparsing definitions, using `clap`. diff --git a/src/list.rs b/src/list.rs new file mode 100644 index 0000000..23af652 --- /dev/null +++ b/src/list.rs @@ -0,0 +1,174 @@ +//! Implementation of the 'list' command, print list of files in an archive + +use self::tree::Tree; +use std::path::{Path, PathBuf}; + +/// Options controlling how archive contents should be listed +#[derive(Debug, Clone, Copy)] +pub struct ListOptions { + /// Whether to show a tree view + pub tree: bool, +} + +/// Represents a single file in an archive, used in `list::list_files()` +#[derive(Debug, Clone)] +pub struct FileInArchive { + /// The file path + pub path: PathBuf, + + /// Whether this file is a directory + pub is_dir: bool, +} + +/// Actually print the files +pub fn list_files(archive: &Path, files: Vec, list_options: ListOptions) { + println!("{}:", archive.display()); + if list_options.tree { + let tree: Tree = files.into_iter().collect(); + tree.print(); + } else { + for FileInArchive { path, is_dir } in files { + print_entry(path.display(), is_dir); + } + } +} + +/// Print an entry and highlight directories, either by coloring them +/// if that's supported or by adding a trailing / +fn print_entry(name: impl std::fmt::Display, is_dir: bool) { + use crate::utils::colors::*; + + if is_dir { + // if colors are deactivated, print final / to mark directories + if BLUE.is_empty() { + println!("{}/", name); + } else { + println!("{}{}{}{}", *BLUE, *STYLE_BOLD, name, *ALL_RESET); + } + } else { + // not a dir -> just print the file name + println!("{}", name); + } +} + +/// Since archives store files as a list of entries -> without direct +/// directory structure (the directories are however part of the name), +/// we have to construct the tree structure ourselves to be able to +/// display them as a tree +mod tree { + use super::FileInArchive; + use linked_hash_map::LinkedHashMap; + use std::ffi::OsString; + use std::iter::FromIterator; + use std::path; + + /// Directory tree + #[derive(Debug, Default)] + pub struct Tree { + file: Option, + children: LinkedHashMap, + } + impl Tree { + /// Insert a file into the tree + pub fn insert(&mut self, file: FileInArchive) { + self.insert_(file.clone(), file.path.iter()); + } + /// Insert file by traversing the tree recursively + fn insert_(&mut self, file: FileInArchive, mut path: path::Iter) { + // Are there more components in the path? -> traverse tree further + if let Some(part) = path.next() { + // Either insert into an existing child node or create a new one + if let Some(t) = self.children.get_mut(part) { + t.insert_(file, path) + } else { + let mut child = Tree::default(); + child.insert_(file, path); + self.children.insert(part.to_os_string(), child); + } + } else { + // `path` was empty -> we reached our destination and can insert + // `file`, assuming there is no file already there (which meant + // there were 2 files with the same name in the same directory + // which should be impossible in any sane file system) + match &self.file { + None => self.file = Some(file), + Some(file) => { + eprintln!( + "[warning] multiple files with the same name in a single directory ({})", + file.path.display() + ) + } + } + } + } + + /// Print the file tree using Unicode line characters + pub fn print(&self) { + for (i, (name, subtree)) in self.children.iter().enumerate() { + subtree.print_(name, String::new(), i == self.children.len() - 1); + } + } + /// Print the tree by traversing it recursively + fn print_(&self, name: &OsString, mut prefix: String, last: bool) { + // Convert `name` to valid unicode + let name = name.to_string_lossy(); + + // If there are no further elements in the parent directory, add + // "└── " to the prefix, otherwise add "├── " + let final_part = match last { + true => draw::FINAL_LAST, + false => draw::FINAL_BRANCH, + }; + + print!("{}{}", prefix, final_part); + let is_dir = match self.file { + Some(FileInArchive { is_dir, .. }) => is_dir, + None => true, + }; + super::print_entry(name, is_dir); + + // Construct prefix for children, adding either a line if this isn't + // the last entry in the parent dir or empty space if it is. + prefix.push_str(match last { + true => draw::PREFIX_EMPTY, + false => draw::PREFIX_LINE, + }); + // Recursively print all children + for (i, (name, subtree)) in self.children.iter().enumerate() { + subtree.print_(name, prefix.clone(), i == self.children.len() - 1); + } + } + } + + impl FromIterator for Tree { + fn from_iter>(iter: I) -> Self { + let mut tree = Self::default(); + for file in iter { + tree.insert(file); + } + tree + } + } + + /// Constants containing the visual parts of which the displayed tree + /// is constructed. + /// + /// They fall into 2 categories: the `PREFIX_*` parts form the first + /// `depth - 1` parts while the `FINAL_*` parts form the last part, + /// right before the entry itself + /// + /// `PREFIX_EMPTY`: the corresponding dir is the last entry in its parent dir + /// `PREFIX_LINE`: there are other entries after the corresponding dir + /// `FINAL_LAST`: this entry is the last entry in its parent dir + /// `FINAL_BRANCH`: there are other entries after this entry + mod draw { + /// the corresponding dir is the last entry in its parent dir + pub const PREFIX_EMPTY: &str = " "; + /// there are other entries after the corresponding dir + pub const PREFIX_LINE: &str = "│ "; + /// this entry is the last entry in its parent dir + pub const FINAL_LAST: &str = "└── "; + /// there are other entries after this entry + pub const FINAL_BRANCH: &str = "├── "; + } +} diff --git a/src/opts.rs b/src/opts.rs index 14d6997..553d033 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -54,4 +54,15 @@ pub enum Subcommand { #[clap(short, long = "dir", value_hint = ValueHint::DirPath)] output_dir: Option, }, + /// List contents. Alias: l + #[clap(alias = "l")] + List { + /// Archives whose contents should be listed + #[clap(required = true, min_values = 1)] + archives: Vec, + + /// Show archive contents as a tree + #[clap(short, long)] + tree: bool, + }, } diff --git a/src/utils.rs b/src/utils.rs index c139eff..17b47fb 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -147,6 +147,9 @@ pub mod colors { color!(RED = "\u{1b}[38;5;9m"); color!(WHITE = "\u{1b}[38;5;15m"); color!(YELLOW = "\u{1b}[38;5;11m"); + color!(STYLE_BOLD = "\u{1b}[1m"); + color!(STYLE_RESET = "\u{1b}[0m"); + color!(ALL_RESET = "\u{1b}[0;39m"); } /// Struct useful to printing bytes as kB, MB, GB, etc.