Merge pull request #133 from SpyrosRoum/Remove-tar-combinations-from-CompressionFormat

Remove tar combinations from compression format
This commit is contained in:
João Marcos Bezerra 2021-11-02 01:20:41 -03:00 committed by GitHub
commit ebe3918478
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 110 additions and 100 deletions

View File

@ -16,6 +16,7 @@ use crate::{
extension::{ extension::{
self, self,
CompressionFormat::{self, *}, CompressionFormat::{self, *},
Extension,
}, },
info, info,
utils::{self, dir_is_empty, nice_directory_display, to_utf}, utils::{self, dir_is_empty, nice_directory_display, to_utf},
@ -55,9 +56,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
return Err(Error::with_reason(reason)); return Err(Error::with_reason(reason));
} }
if !formats.get(0).map(CompressionFormat::is_archive_format).unwrap_or(false) if !formats.get(0).map(Extension::is_archive).unwrap_or(false) && represents_several_files(&files) {
&& represents_several_files(&files)
{
// This piece of code creates a suggestion for compressing multiple files // This piece of code creates a suggestion for compressing multiple files
// It says: // It says:
// Change from file.bz.xz // Change from file.bz.xz
@ -85,7 +84,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
return Err(Error::with_reason(reason)); return Err(Error::with_reason(reason));
} }
if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive_format()) { if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive()) {
let reason = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path))) let reason = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
.detail(format!("Found the format '{}' in an incorrect position.", format)) .detail(format!("Found the format '{}' in an incorrect position.", format))
.detail(format!("'{}' can only be used at the start of the file extension.", format)) .detail(format!("'{}' can only be used at the start of the file extension.", format))
@ -107,12 +106,28 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
// `ouch compress file.tar.gz file.tar.gz.xz` should produce `file.tar.gz.xz` and not `file.tar.gz.tar.gz.xz` // `ouch compress file.tar.gz file.tar.gz.xz` should produce `file.tar.gz.xz` and not `file.tar.gz.tar.gz.xz`
let input_extensions = extension::extensions_from_path(&files[0]); let input_extensions = extension::extensions_from_path(&files[0]);
// We calculate the formats that are left if we filter out a sublist at the start of what we have that's the same as the input formats
let mut new_formats = Vec::with_capacity(formats.len());
for (inp_ext, out_ext) in input_extensions.iter().zip(&formats) {
if inp_ext.compression_formats == out_ext.compression_formats {
new_formats.push(out_ext.clone());
} else if inp_ext
.compression_formats
.iter()
.zip(&out_ext.compression_formats)
.all(|(inp, out)| inp == out)
{
let new_ext = Extension::new(
&out_ext.compression_formats[..inp_ext.compression_formats.len()],
&out_ext.display_text,
);
new_formats.push(new_ext);
break;
}
}
// If the input is a sublist at the start of `formats` then remove the extensions // If the input is a sublist at the start of `formats` then remove the extensions
// Note: If input_extensions is empty this counts as true // Note: If input_extensions is empty then it will make `formats` empty too, which we don't want
if !input_extensions.is_empty() if !input_extensions.is_empty() && new_formats != formats {
&& input_extensions.len() < formats.len()
&& input_extensions.iter().zip(&formats).all(|(inp, out)| inp == out)
{
// Safety: // Safety:
// We checked above that input_extensions isn't empty, so files[0] has a extension. // We checked above that input_extensions isn't empty, so files[0] has a extension.
// //
@ -123,8 +138,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
to_utf(files[0].as_path().file_name().unwrap()), to_utf(files[0].as_path().file_name().unwrap()),
to_utf(&output_path) to_utf(&output_path)
); );
let drain_iter = formats.drain(..input_extensions.len()); formats = new_formats;
drop(drain_iter); // Remove the extensions from `formats`
} }
} }
let compress_result = compress_files(files, formats, output_file); let compress_result = compress_files(files, formats, output_file);
@ -189,7 +203,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
// files are the list of paths to be compressed: ["dir/file1.txt", "dir/file2.txt"] // files are the list of paths to be compressed: ["dir/file1.txt", "dir/file2.txt"]
// formats contains each format necessary for compression, example: [Tar, Gz] (in compression order) // formats contains each format necessary for compression, example: [Tar, Gz] (in compression order)
// output_file is the resulting compressed file name, example: "compressed.tar.gz" // output_file is the resulting compressed file name, example: "compressed.tar.gz"
fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_file: fs::File) -> crate::Result<()> { fn compress_files(files: Vec<PathBuf>, formats: Vec<Extension>, output_file: fs::File) -> crate::Result<()> {
let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file); let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file);
let mut writer: Box<dyn Write> = Box::new(file_writer); let mut writer: Box<dyn Write> = Box::new(file_writer);
@ -212,13 +226,13 @@ fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_f
encoder encoder
}; };
for format in formats.iter().skip(1).rev() { for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
writer = chain_writer_encoder(format, writer); writer = chain_writer_encoder(format, writer);
} }
match formats[0] { match formats[0].compression_formats[0] {
Gzip | Bzip | Lzma | Zstd => { Gzip | Bzip | Lzma | Zstd => {
writer = chain_writer_encoder(&formats[0], writer); writer = chain_writer_encoder(&formats[0].compression_formats[0], writer);
let mut reader = fs::File::open(&files[0]).unwrap(); let mut reader = fs::File::open(&files[0]).unwrap();
io::copy(&mut reader, &mut writer)?; io::copy(&mut reader, &mut writer)?;
} }
@ -226,26 +240,6 @@ fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_f
let mut writer = archive::tar::build_archive_from_paths(&files, writer)?; let mut writer = archive::tar::build_archive_from_paths(&files, writer)?;
writer.flush()?; writer.flush()?;
} }
Tgz => {
let encoder = flate2::write::GzEncoder::new(writer, Default::default());
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Tbz => {
let encoder = bzip2::write::BzEncoder::new(writer, Default::default());
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Tlzma => {
let encoder = xz2::write::XzEncoder::new(writer, 6);
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Tzst => {
let encoder = zstd::stream::write::Encoder::new(writer, Default::default())?;
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Zip => { Zip => {
eprintln!("{yellow}Warning:{reset}", yellow = *colors::YELLOW, reset = *colors::RESET); eprintln!("{yellow}Warning:{reset}", yellow = *colors::YELLOW, reset = *colors::RESET);
eprintln!("\tCompressing .zip entirely in memory."); eprintln!("\tCompressing .zip entirely in memory.");
@ -274,7 +268,7 @@ fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_f
// file_name is only used when extracting single file formats, no archive formats like .tar or .zip // file_name is only used when extracting single file formats, no archive formats like .tar or .zip
fn decompress_file( fn decompress_file(
input_file_path: &Path, input_file_path: &Path,
formats: Vec<extension::CompressionFormat>, formats: Vec<Extension>,
output_dir: Option<&Path>, output_dir: Option<&Path>,
file_name: &Path, file_name: &Path,
question_policy: QuestionPolicy, question_policy: QuestionPolicy,
@ -296,7 +290,7 @@ fn decompress_file(
// in-memory decompression/copying first. // in-memory decompression/copying first.
// //
// Any other Zip decompression done can take up the whole RAM and freeze ouch. // Any other Zip decompression done can take up the whole RAM and freeze ouch.
if let [Zip] = *formats.as_slice() { if formats.len() == 1 && *formats[0].compression_formats.as_slice() == [Zip] {
utils::create_dir_if_non_existent(output_dir)?; utils::create_dir_if_non_existent(output_dir)?;
let zip_archive = zip::ZipArchive::new(reader)?; let zip_archive = zip::ZipArchive::new(reader)?;
let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?; let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
@ -320,7 +314,7 @@ fn decompress_file(
Ok(decoder) Ok(decoder)
}; };
for format in formats.iter().skip(1).rev() { for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
reader = chain_reader_decoder(format, reader)?; reader = chain_reader_decoder(format, reader)?;
} }
@ -328,9 +322,9 @@ fn decompress_file(
let files_unpacked; let files_unpacked;
match formats[0] { match formats[0].compression_formats[0] {
Gzip | Bzip | Lzma | Zstd => { Gzip | Bzip | Lzma | Zstd => {
reader = chain_reader_decoder(&formats[0], reader)?; reader = chain_reader_decoder(&formats[0].compression_formats[0], reader)?;
// TODO: improve error treatment // TODO: improve error treatment
let mut writer = fs::File::create(&output_path)?; let mut writer = fs::File::create(&output_path)?;
@ -341,22 +335,6 @@ fn decompress_file(
Tar => { Tar => {
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?; files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
} }
Tgz => {
let reader = chain_reader_decoder(&Gzip, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tbz => {
let reader = chain_reader_decoder(&Bzip, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tlzma => {
let reader = chain_reader_decoder(&Lzma, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tzst => {
let reader = chain_reader_decoder(&Zstd, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Zip => { Zip => {
eprintln!("Compressing first into .zip."); eprintln!("Compressing first into .zip.");
eprintln!("Warning: .zip archives with extra extensions have a downside."); eprintln!("Warning: .zip archives with extra extensions have a downside.");

View File

@ -4,26 +4,62 @@ use std::{ffi::OsStr, fmt, path::Path};
use self::CompressionFormat::*; use self::CompressionFormat::*;
/// A wrapper around `CompressionFormat` that allows combinations like `tgz`
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Extension {
pub compression_formats: Vec<CompressionFormat>,
pub display_text: String,
}
impl Extension {
/// # Panics:
/// Will panic if `formats` is empty
pub fn new(formats: impl Into<Vec<CompressionFormat>>, text: impl Into<String>) -> Self {
let formats = formats.into();
assert!(!formats.is_empty());
Self { compression_formats: formats, display_text: text.into() }
}
/// Checks if the first format in `compression_formats` is an archive
pub fn is_archive(&self) -> bool {
// Safety: we check that `compression_formats` is not empty in `Self::new`
self.compression_formats[0].is_archive_format()
}
pub fn iter(&self) -> impl Iterator<Item = &CompressionFormat> {
self.compression_formats.iter()
}
}
impl fmt::Display for Extension {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.display_text)
}
}
#[allow(missing_docs)] #[allow(missing_docs)]
#[derive(Clone, PartialEq, Eq, Debug)] #[derive(Copy, Clone, PartialEq, Eq, Debug)]
/// Accepted extensions for input and output /// Accepted extensions for input and output
pub enum CompressionFormat { pub enum CompressionFormat {
Gzip, // .gz Gzip, // .gz
Bzip, // .bz Bzip, // .bz
Lzma, // .lzma Lzma, // .lzma
Tar, // .tar (technically not a compression extension, but will do for now) Tar, // .tar (technically not a compression extension, but will do for now)
Tgz, // .tgz Zstd, // .zst
Tbz, // .tbz Zip, // .zip
Tlzma, // .tlzma
Tzst, // .tzst
Zstd, // .zst
Zip, // .zip
} }
impl CompressionFormat { impl CompressionFormat {
/// Currently supported archive formats are .tar (and aliases to it) and .zip /// Currently supported archive formats are .tar (and aliases to it) and .zip
pub fn is_archive_format(&self) -> bool { pub fn is_archive_format(&self) -> bool {
matches!(self, Tar | Tgz | Tbz | Tlzma | Tzst | Zip) // Keep this match like that without a wildcard `_` so we don't forget to update it
match self {
Tar | Zip => true,
Gzip => false,
Bzip => false,
Lzma => false,
Zstd => false,
}
} }
} }
@ -38,10 +74,6 @@ impl fmt::Display for CompressionFormat {
Zstd => ".zst", Zstd => ".zst",
Lzma => ".lz", Lzma => ".lz",
Tar => ".tar", Tar => ".tar",
Tgz => ".tgz",
Tbz => ".tbz",
Tlzma => ".tlz",
Tzst => ".tzst",
Zip => ".zip", Zip => ".zip",
} }
) )
@ -53,15 +85,7 @@ impl fmt::Display for CompressionFormat {
/// Extracts extensions from a path, /// Extracts extensions from a path,
/// return both the remaining path and the list of extension objects /// return both the remaining path and the list of extension objects
/// pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<Extension>) {
/// ```rust
/// use ouch::extension::{separate_known_extensions_from_name, CompressionFormat};
/// use std::path::Path;
///
/// let mut path = Path::new("bolovo.tar.gz");
/// assert_eq!(separate_known_extensions_from_name(&path), (Path::new("bolovo"), vec![CompressionFormat::Tar, CompressionFormat::Gzip]));
/// ```
pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<CompressionFormat>) {
// // TODO: check for file names with the name of an extension // // TODO: check for file names with the name of an extension
// // TODO2: warn the user that currently .tar.gz is a .gz file named .tar // // TODO2: warn the user that currently .tar.gz is a .gz file named .tar
// //
@ -75,16 +99,16 @@ pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<Compr
// While there is known extensions at the tail, grab them // While there is known extensions at the tail, grab them
while let Some(extension) = path.extension().and_then(OsStr::to_str) { while let Some(extension) = path.extension().and_then(OsStr::to_str) {
extensions.push(match extension { extensions.push(match extension {
"tar" => Tar, "tar" => Extension::new([Tar], extension),
"tgz" => Tgz, "tgz" => Extension::new([Tar, Gzip], extension),
"tbz" | "tbz2" => Tbz, "tbz" | "tbz2" => Extension::new([Tar, Bzip], extension),
"txz" | "tlz" | "tlzma" => Tlzma, "txz" | "tlz" | "tlzma" => Extension::new([Tar, Lzma], extension),
"tzst" => Tzst, "tzst" => Extension::new([Tar, Zstd], ".tzst"),
"zip" => Zip, "zip" => Extension::new([Zip], extension),
"bz" | "bz2" => Bzip, "bz" | "bz2" => Extension::new([Bzip], extension),
"gz" => Gzip, "gz" => Extension::new([Gzip], extension),
"xz" | "lzma" | "lz" => Lzma, "xz" | "lzma" | "lz" => Extension::new([Lzma], extension),
"zst" => Zstd, "zst" => Extension::new([Zstd], extension),
_ => break, _ => break,
}); });
@ -98,15 +122,23 @@ pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<Compr
} }
/// Extracts extensions from a path, return only the list of extension objects /// Extracts extensions from a path, return only the list of extension objects
/// pub fn extensions_from_path(path: &Path) -> Vec<Extension> {
/// ```rust
/// use ouch::extension::{extensions_from_path, CompressionFormat};
/// use std::path::Path;
///
/// let mut path = Path::new("bolovo.tar.gz");
/// assert_eq!(extensions_from_path(&path), vec![CompressionFormat::Tar, CompressionFormat::Gzip]);
/// ```
pub fn extensions_from_path(path: &Path) -> Vec<CompressionFormat> {
let (_, extensions) = separate_known_extensions_from_name(path); let (_, extensions) = separate_known_extensions_from_name(path);
extensions extensions
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extensions_from_path() {
use CompressionFormat::*;
let path = Path::new("bolovo.tar.gz");
let extensions: Vec<Extension> = extensions_from_path(&path);
let formats: Vec<&CompressionFormat> = extensions.iter().flat_map(Extension::iter).collect::<Vec<_>>();
assert_eq!(formats, vec![&Tar, &Gzip]);
}
}