From 9e41712f44cadfdbc8c5e687e1a54a81d0efd29b Mon Sep 17 00:00:00 2001 From: Nbiba Bedis Date: Mon, 15 Nov 2021 11:27:22 +0100 Subject: [PATCH 1/6] Smart unpack archives --- Cargo.toml | 2 +- src/commands.rs | 85 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eb90828..5278117 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ walkdir = "2.3.2" xz2 = "0.1.6" zip = { version = "0.5.13", default-features = false } zstd = { version = "0.9.0", default-features = false } +tempfile = "3.2.0" [build-dependencies] clap = "=3.0.0-beta.5" @@ -38,7 +39,6 @@ infer = "0.5.0" parse-display = "0.5.3" proptest = "1.0.0" rand = { version = "0.8.3", default-features = false, features = ["small_rng", "std"] } -tempfile = "3.2.0" test-strategy = "0.1.2" [features] diff --git a/src/commands.rs b/src/commands.rs index 8576a8c..03f5aa7 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -355,7 +355,16 @@ fn decompress_file( // Any other Zip decompression done can take up the whole RAM and freeze ouch. if formats.len() == 1 && *formats[0].compression_formats == [Zip] { let zip_archive = zip::ZipArchive::new(reader)?; - let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?; + let _files = if let ControlFlow::Continue(files) = smart_unpack( + Box::new(move |output_dir| crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)), + output_dir, + &output_file_path, + question_policy, + )? { + files + } else { + return Ok(()); + }; info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir)); return Ok(()); } @@ -397,7 +406,16 @@ fn decompress_file( files_unpacked = vec![output_file_path]; } Tar => { - files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?; + files_unpacked = if let ControlFlow::Continue(files) = smart_unpack( + Box::new(move |output_dir| crate::archive::tar::unpack_archive(reader, output_dir, question_policy)), + output_dir, + &output_file_path, + question_policy, + )? { + files + } else { + return Ok(()); + }; } Zip => { eprintln!("Compressing first into .zip."); @@ -411,7 +429,18 @@ fn decompress_file( io::copy(&mut reader, &mut vec)?; let zip_archive = zip::ZipArchive::new(io::Cursor::new(vec))?; - files_unpacked = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?; + files_unpacked = if let ControlFlow::Continue(files) = smart_unpack( + Box::new(move |output_dir| { + crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy) + }), + output_dir, + &output_file_path, + question_policy, + )? { + files + } else { + return Ok(()); + }; } } @@ -487,6 +516,56 @@ fn list_archive_contents( Ok(()) } +/// Unpacks an archive with some heuristics +/// - If the archive contains only one file, it will be extracted to the `output_dir` +/// - If the archive contains multiple files, it will be extracted to a subdirectory of the output_dir named after the archive (given by `output_file_path`) +fn smart_unpack( + unpack_fn: Box crate::Result>>, + output_dir: &Path, + output_file_path: &Path, + question_policy: QuestionPolicy, +) -> crate::Result>> { + let temp_dir = tempfile::tempdir_in(output_dir)?; + let temp_dir_path = temp_dir.path(); + info!("Created temporary directory {} to hold decompressed elements.", nice_directory_display(temp_dir_path)); + + // unpack the files + let files = unpack_fn(temp_dir_path)?; + + let root_contains_only_one_element = fs::read_dir(&temp_dir_path)?.count() == 1; + if root_contains_only_one_element { + // Only one file in the root directory, so we can just move it to the output directory + let file = fs::read_dir(&temp_dir_path)?.next().unwrap().unwrap(); + let file_path = file.path(); + let file_name = file_path.file_name().unwrap().to_str().unwrap(); + let correct_path = output_dir.join(file_name); + // One case to handle tough is we need to check if a file with the same name already exists + if !utils::clear_path(&correct_path, question_policy)? { + return Ok(ControlFlow::Break(())); + } + fs::rename(&file_path, &correct_path)?; + info!( + "Successfully moved {} to {}.", + nice_directory_display(&file_path), + nice_directory_display(&correct_path) + ); + } else { + // Multiple files in the root directory, so: + // Rename the temporary directory to the archive name, which is output_file_path + // One case to handle tough is we need to check if a file with the same name already exists + if !utils::clear_path(output_file_path, question_policy)? { + return Ok(ControlFlow::Break(())); + } + fs::rename(&temp_dir_path, &output_file_path)?; + info!( + "Successfully moved {} to {}.", + nice_directory_display(&temp_dir_path), + nice_directory_display(&output_file_path) + ); + } + Ok(ControlFlow::Continue(files)) +} + fn check_mime_type( files: &[PathBuf], formats: &mut Vec>, From c0b37c117cf988bb8e74f933326f72459c979f92 Mon Sep 17 00:00:00 2001 From: Nbiba Bedis Date: Mon, 15 Nov 2021 11:37:32 +0100 Subject: [PATCH 2/6] Add comment --- src/commands.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/commands.rs b/src/commands.rs index 03f5aa7..dfb9bbb 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -519,6 +519,7 @@ fn list_archive_contents( /// Unpacks an archive with some heuristics /// - If the archive contains only one file, it will be extracted to the `output_dir` /// - If the archive contains multiple files, it will be extracted to a subdirectory of the output_dir named after the archive (given by `output_file_path`) +/// Note: This functions assumes that `output_dir` exists fn smart_unpack( unpack_fn: Box crate::Result>>, output_dir: &Path, From 1c52dc5ee41d4da31adcced758ffd223ae69345c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Marcos=20Bezerra?= Date: Mon, 15 Nov 2021 13:05:46 -0300 Subject: [PATCH 3/6] Assert that output_dir exists --- src/commands.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/commands.rs b/src/commands.rs index dfb9bbb..4605f02 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -526,6 +526,7 @@ fn smart_unpack( output_file_path: &Path, question_policy: QuestionPolicy, ) -> crate::Result>> { + assert!(output_dir.exists()); let temp_dir = tempfile::tempdir_in(output_dir)?; let temp_dir_path = temp_dir.path(); info!("Created temporary directory {} to hold decompressed elements.", nice_directory_display(temp_dir_path)); From 4fc49b63cc4f5848322a55cde25a543d9a90e32d Mon Sep 17 00:00:00 2001 From: Nbiba Bedis Date: Tue, 16 Nov 2021 09:23:40 +0100 Subject: [PATCH 4/6] Assert that output_dir exists in decompress_fn as well --- src/commands.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/commands.rs b/src/commands.rs index 4605f02..17f6f7b 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -345,6 +345,7 @@ fn decompress_file( output_file_path: PathBuf, question_policy: QuestionPolicy, ) -> crate::Result<()> { + assert!(output_dir.exists()); let reader = fs::File::open(&input_file_path)?; // Zip archives are special, because they require io::Seek, so it requires it's logic separated // from decoder chaining. From a87b89073e1dd251d49508559f6965f9c7f4adec Mon Sep 17 00:00:00 2001 From: Nbiba Bedis Date: Tue, 16 Nov 2021 09:32:01 +0100 Subject: [PATCH 5/6] Document unwrap safety --- src/commands.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/commands.rs b/src/commands.rs index 17f6f7b..e23f02c 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -538,9 +538,10 @@ fn smart_unpack( let root_contains_only_one_element = fs::read_dir(&temp_dir_path)?.count() == 1; if root_contains_only_one_element { // Only one file in the root directory, so we can just move it to the output directory - let file = fs::read_dir(&temp_dir_path)?.next().unwrap().unwrap(); + let file = fs::read_dir(&temp_dir_path)?.next().expect("item exists")?; let file_path = file.path(); - let file_name = file_path.file_name().unwrap().to_str().unwrap(); + let file_name = + file_path.file_name().expect("Should be safe because paths in archives should not end with '..'"); let correct_path = output_dir.join(file_name); // One case to handle tough is we need to check if a file with the same name already exists if !utils::clear_path(&correct_path, question_policy)? { From ba617fdea8e1695262ca0e7d926173f9430e8340 Mon Sep 17 00:00:00 2001 From: Nbiba Bedis Date: Tue, 16 Nov 2021 09:39:10 +0100 Subject: [PATCH 6/6] Make logging consistant for zip --- src/commands.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/commands.rs b/src/commands.rs index e23f02c..35b6dbc 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -356,7 +356,7 @@ fn decompress_file( // Any other Zip decompression done can take up the whole RAM and freeze ouch. if formats.len() == 1 && *formats[0].compression_formats == [Zip] { let zip_archive = zip::ZipArchive::new(reader)?; - let _files = if let ControlFlow::Continue(files) = smart_unpack( + let files = if let ControlFlow::Continue(files) = smart_unpack( Box::new(move |output_dir| crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)), output_dir, &output_file_path, @@ -367,6 +367,7 @@ fn decompress_file( return Ok(()); }; info!("Successfully decompressed archive in {}.", nice_directory_display(output_dir)); + info!("Files unpacked: {}", files.len()); return Ok(()); }