multi-threaded gzip compression using gzp

This commit is contained in:
figsoda 2023-01-30 19:10:12 -05:00
parent 118a334c86
commit 5abdd5fc96
3 changed files with 212 additions and 10 deletions

199
Cargo.lock generated
View File

@ -81,12 +81,24 @@ dependencies = [
"serde",
]
[[package]]
name = "bumpalo"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
[[package]]
name = "bzip2"
version = "0.4.4"
@ -190,6 +202,17 @@ dependencies = [
"roff",
]
[[package]]
name = "core_affinity"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4436406e93f52cce33bfba4be067a9f7229da44a634c385e4b22cdfaca5f84cc"
dependencies = [
"libc",
"num_cpus",
"winapi",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
@ -313,6 +336,19 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "flume"
version = "0.10.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577"
dependencies = [
"futures-core",
"futures-sink",
"nanorand",
"pin-project",
"spin",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -325,6 +361,18 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0845fa252299212f0389d64ba26f34fa32cfe41588355f21ed507c59a0f64541"
[[package]]
name = "futures-core"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608"
[[package]]
name = "futures-sink"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364"
[[package]]
name = "getrandom"
version = "0.2.8"
@ -332,8 +380,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi",
"wasm-bindgen",
]
[[package]]
@ -349,6 +399,22 @@ dependencies = [
"regex",
]
[[package]]
name = "gzp"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7c65d1899521a11810501b50b898464d133e1afc96703cff57726964cfa7baf"
dependencies = [
"byteorder",
"bytes",
"core_affinity",
"flate2",
"flume",
"libz-sys",
"num_cpus",
"thiserror",
]
[[package]]
name = "heck"
version = "0.4.0"
@ -463,6 +529,15 @@ dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -482,6 +557,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
@ -498,6 +574,16 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
[[package]]
name = "lock_api"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.17"
@ -551,6 +637,15 @@ dependencies = [
"adler",
]
[[package]]
name = "nanorand"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
dependencies = [
"getrandom",
]
[[package]]
name = "num-traits"
version = "0.2.15"
@ -596,6 +691,7 @@ dependencies = [
"filetime",
"flate2",
"fs-err",
"gzp",
"ignore",
"infer",
"is_executable",
@ -646,6 +742,26 @@ dependencies = [
"syn",
]
[[package]]
name = "pin-project"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pkg-config"
version = "0.3.26"
@ -920,6 +1036,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831"
[[package]]
name = "spin"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09"
dependencies = [
"lock_api",
]
[[package]]
name = "strsim"
version = "0.10.0"
@ -1012,6 +1137,26 @@ dependencies = [
"syn",
]
[[package]]
name = "thiserror"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.4"
@ -1104,6 +1249,60 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
[[package]]
name = "winapi"
version = "0.3.9"

View File

@ -18,6 +18,7 @@ clap = { version = "4.1.4", features = ["derive", "env"] }
filetime = "0.2.19"
flate2 = { version = "1.0.25", default-features = false }
fs-err = "2.9.0"
gzp = { version = "0.11.3", default-features = false }
ignore = "0.4.20"
libc = "0.2.139"
linked-hash-map = "0.5.6"
@ -56,7 +57,7 @@ rand = { version = "0.8.5", default-features = false, features = ["small_rng", "
test-strategy = "0.3.0"
[features]
default = ["flate2/zlib", "zip/deflate-zlib", "zstd/thin"]
default = ["flate2/zlib", "gzp/deflate_zlib", "zip/deflate-zlib", "zstd/thin"]
[profile.release]
lto = true

View File

@ -8,11 +8,7 @@ use fs_err as fs;
use crate::{
archive,
commands::warn_user_about_loading_zip_in_memory,
extension::{
split_first_compression_format,
CompressionFormat::{self, *},
Extension,
},
extension::{split_first_compression_format, CompressionFormat::*, Extension},
utils::{user_wants_to_continue, FileVisibilityPolicy},
QuestionAction, QuestionPolicy, BUFFER_CAPACITY,
};
@ -39,12 +35,18 @@ pub fn compress_files(
// If the input files contain a directory, then the total size will be underestimated
let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file);
let mut writer: Box<dyn Write> = Box::new(file_writer);
let mut writer: Box<dyn Send + Write> = Box::new(file_writer);
// Grab previous encoder and wrap it inside of a new one
let chain_writer_encoder = |format: &CompressionFormat, encoder: Box<dyn Write>| -> crate::Result<Box<dyn Write>> {
let encoder: Box<dyn Write> = match format {
Gzip => Box::new(flate2::write::GzEncoder::new(encoder, Default::default())),
let chain_writer_encoder = |format: &_, encoder| -> crate::Result<_> {
let encoder: Box<dyn Send + Write> = match format {
Gzip => Box::new(
// by default, ParCompress uses a default compression level of 3
// instead of the regular default that flate2 uses
gzp::par::compress::ParCompress::<gzp::deflate::Gzip>::builder()
.compression_level(Default::default())
.from_writer(encoder),
),
Bzip => Box::new(bzip2::write::BzEncoder::new(encoder, Default::default())),
Lz4 => Box::new(lzzzz::lz4f::WriteCompressor::new(encoder, Default::default())?),
Lzma => Box::new(xz2::write::XzEncoder::new(encoder, 6)),