From b9b1e11303f1d12d7120354e24f78865bdff253b Mon Sep 17 00:00:00 2001 From: tommady Date: Thu, 17 Apr 2025 04:43:46 +0800 Subject: [PATCH] Store symlinks by default and add `--follow-symlinks` to toggle it (#789) --- CHANGELOG.md | 1 + src/archive/tar.rs | 32 +++++++++++++- src/archive/zip.rs | 39 +++++++++++++++-- src/cli/args.rs | 8 ++++ src/cli/mod.rs | 2 +- src/commands/compress.rs | 11 ++++- src/commands/mod.rs | 2 + tests/integration.rs | 95 +++++++++++++++++++++++++++++++++++++++- tests/utils.rs | 2 +- 9 files changed, 184 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27643f5..ba482f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Categories Used: - Add `--remove` flag for decompression subcommand to remove files after successful decompression [\#757](https://github.com/ouch-org/ouch/pull/757) ([ttys3](https://github.com/ttys3)) - Add `br` (Brotli) support [\#765](https://github.com/ouch-org/ouch/pull/765) ([killercup](https://github.com/killercup)) - Add rename option in overwrite menu [\#779](https://github.com/ouch-org/ouch/pull/779) ([talis-fb](https://github.com/talis-fb)) +- Store symlinks by default and add `--follow-symlinks` to store the target files [\#789](https://github.com/ouch-org/ouch/pull/789) ([tommady](https://github.com/tommady)) ### Bug Fixes diff --git a/src/archive/tar.rs b/src/archive/tar.rs index 3416078..c0d962d 100644 --- a/src/archive/tar.rs +++ b/src/archive/tar.rs @@ -31,7 +31,24 @@ pub fn unpack_archive(reader: Box, output_folder: &Path, quiet: bool) for file in archive.entries()? { let mut file = file?; - file.unpack_in(output_folder)?; + match file.header().entry_type() { + tar::EntryType::Symlink => { + let relative_path = file.path()?.to_path_buf(); + let full_path = output_folder.join(&relative_path); + let target = file + .link_name()? + .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidData, "Missing symlink target"))?; + + #[cfg(unix)] + std::os::unix::fs::symlink(&target, &full_path)?; + #[cfg(windows)] + std::os::windows::fs::symlink_file(&target, &full_path)?; + } + tar::EntryType::Regular | tar::EntryType::Directory => { + file.unpack_in(output_folder)?; + } + _ => continue, + } // This is printed for every file in the archive and has little // importance for most users, but would generate lots of @@ -87,6 +104,7 @@ pub fn build_archive_from_paths( writer: W, file_visibility_policy: FileVisibilityPolicy, quiet: bool, + follow_symlinks: bool, ) -> crate::Result where W: Write, @@ -127,6 +145,18 @@ where if path.is_dir() { builder.append_dir(path, path)?; + } else if path.is_symlink() && !follow_symlinks { + let target_path = path.read_link()?; + + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Symlink); + header.set_size(0); + + builder.append_link(&mut header, path, &target_path).map_err(|err| { + FinalError::with_title("Could not create archive") + .detail("Unexpected error while trying to read link") + .detail(format!("Error: {err}.")) + })?; } else { let mut file = match fs::File::open(path) { Ok(f) => f, diff --git a/src/archive/zip.rs b/src/archive/zip.rs index f525d3b..d593477 100644 --- a/src/archive/zip.rs +++ b/src/archive/zip.rs @@ -85,8 +85,23 @@ where )); } - let mut output_file = fs::File::create(file_path)?; - io::copy(&mut file, &mut output_file)?; + let mode = file.unix_mode().ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "Cannot extract file's mode") + })?; + let is_symlink = (mode & 0o170000) == 0o120000; + + if is_symlink { + let mut target = String::new(); + file.read_to_string(&mut target)?; + + #[cfg(unix)] + std::os::unix::fs::symlink(&target, file_path)?; + #[cfg(windows)] + std::os::windows::fs::symlink_file(&target, file_path)?; + } else { + let mut output_file = fs::File::create(file_path)?; + io::copy(&mut file, &mut output_file)?; + } set_last_modified_time(&file, file_path)?; } @@ -155,6 +170,7 @@ pub fn build_archive_from_paths( writer: W, file_visibility_policy: FileVisibilityPolicy, quiet: bool, + follow_symlinks: bool, ) -> crate::Result where W: Write + Seek, @@ -223,7 +239,7 @@ where }; #[cfg(unix)] - let options = options.unix_permissions(metadata.permissions().mode()); + let mode = metadata.permissions().mode(); let entry_name = path.to_str().ok_or_else(|| { FinalError::with_title("Zip requires that all directories names are valid UTF-8") @@ -232,6 +248,21 @@ where if metadata.is_dir() { writer.add_directory(entry_name, options)?; + } else if path.is_symlink() && !follow_symlinks { + let target_path = path.read_link()?; + let target_name = target_path.to_str().ok_or_else(|| { + FinalError::with_title("Zip requires that all directories names are valid UTF-8") + .detail(format!("File at '{target_path:?}' has a non-UTF-8 name")) + })?; + + // This approach writes the symlink target path as the content of the symlink entry. + // We detect symlinks during extraction by checking for the Unix symlink mode (0o120000) in the entry's permissions. + #[cfg(unix)] + let symlink_options = options.unix_permissions(0o120000 | (mode & 0o777)); + #[cfg(windows)] + let symlink_options = options.unix_permissions(0o120777); + + writer.add_symlink(entry_name, target_name, symlink_options)?; } else { #[cfg(not(unix))] let options = if is_executable::is_executable(path) { @@ -242,6 +273,8 @@ where let mut file = fs::File::open(path)?; + #[cfg(unix)] + let options = options.unix_permissions(mode); // Updated last modified time let last_modified_time = options.last_modified_time(get_last_modified_time(&file)); diff --git a/src/cli/args.rs b/src/cli/args.rs index 5701f4e..c72d5d3 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -81,6 +81,10 @@ pub enum Subcommand { /// conflicts with --level and --fast #[arg(long, group = "compression-level")] slow: bool, + + /// Archive target files instead of storing symlinks (supported by `tar` and `zip`) + #[arg(long, short = 'S')] + follow_symlinks: bool, }, /// Decompresses one or more files, optionally into another folder #[command(visible_alias = "d")] @@ -201,6 +205,7 @@ mod tests { level: None, fast: false, slow: false, + follow_symlinks: false, }, ..mock_cli_args() } @@ -214,6 +219,7 @@ mod tests { level: None, fast: false, slow: false, + follow_symlinks: false, }, ..mock_cli_args() } @@ -227,6 +233,7 @@ mod tests { level: None, fast: false, slow: false, + follow_symlinks: false, }, ..mock_cli_args() } @@ -251,6 +258,7 @@ mod tests { level: None, fast: false, slow: false, + follow_symlinks: false, }, format: Some("tar.gz".into()), ..mock_cli_args() diff --git a/src/cli/mod.rs b/src/cli/mod.rs index ce6b5d5..185fae6 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -54,7 +54,7 @@ fn canonicalize_files(files: &[impl AsRef]) -> io::Result> { files .iter() .map(|f| { - if is_path_stdin(f.as_ref()) { + if is_path_stdin(f.as_ref()) || f.as_ref().is_symlink() { Ok(f.as_ref().to_path_buf()) } else { fs::canonicalize(f) diff --git a/src/commands/compress.rs b/src/commands/compress.rs index 59987b8..b6f7042 100644 --- a/src/commands/compress.rs +++ b/src/commands/compress.rs @@ -31,6 +31,7 @@ pub fn compress_files( output_file: fs::File, output_path: &Path, quiet: bool, + follow_symlinks: bool, question_policy: QuestionPolicy, file_visibility_policy: FileVisibilityPolicy, level: Option, @@ -108,7 +109,14 @@ pub fn compress_files( io::copy(&mut reader, &mut writer)?; } Tar => { - archive::tar::build_archive_from_paths(&files, output_path, &mut writer, file_visibility_policy, quiet)?; + archive::tar::build_archive_from_paths( + &files, + output_path, + &mut writer, + file_visibility_policy, + quiet, + follow_symlinks, + )?; writer.flush()?; } Zip => { @@ -131,6 +139,7 @@ pub fn compress_files( &mut vec_buffer, file_visibility_policy, quiet, + follow_symlinks, )?; vec_buffer.rewind()?; io::copy(&mut vec_buffer, &mut writer)?; diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 60a2866..3e8718b 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -67,6 +67,7 @@ pub fn run( level, fast, slow, + follow_symlinks, } => { // After cleaning, if there are no input files left, exit if files.is_empty() { @@ -109,6 +110,7 @@ pub fn run( output_file, &output_path, args.quiet, + follow_symlinks, question_policy, file_visibility_policy, level, diff --git a/tests/integration.rs b/tests/integration.rs index 7a5e549..1a984a5 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,7 +1,11 @@ #[macro_use] mod utils; -use std::{io::Write, iter::once, path::PathBuf}; +use std::{ + io::Write, + iter::once, + path::{Path, PathBuf}, +}; use fs_err as fs; use parse_display::Display; @@ -467,3 +471,92 @@ fn unpack_rar_stdin() -> Result<(), Box> { Ok(()) } + +#[proptest(cases = 25)] +fn symlink_pack_and_unpack( + ext: DirectoryExtension, + #[any(size_range(0..1).lift())] extra_extensions: Vec, +) { + if matches!(ext, DirectoryExtension::SevenZ) { + // Skip 7z because the 7z format does not support symlinks + return Ok(()); + } + + let temp_dir = tempdir()?; + let root_path = temp_dir.path(); + + let src_files_path = root_path.join("src_files"); + fs::create_dir_all(&src_files_path)?; + + let mut files_path = ["file1.txt", "file2.txt", "file3.txt", "file4.txt", "file5.txt"] + .into_iter() + .map(|f| src_files_path.join(f)) + .map(|path| { + let mut file = fs::File::create(&path).unwrap(); + file.write_all("Some content".as_bytes()).unwrap(); + path + }) + .collect::>(); + + let dest_files_path = root_path.join("dest_files"); + fs::create_dir_all(&dest_files_path)?; + + let symlink_path = src_files_path.join(Path::new("symlink")); + #[cfg(unix)] + std::os::unix::fs::symlink(&files_path[0], &symlink_path)?; + #[cfg(windows)] + std::os::windows::fs::symlink_file(&files_path[0], &symlink_path)?; + + files_path.push(symlink_path); + + let archive = &root_path.join(format!("archive.{}", merge_extensions(&ext, extra_extensions))); + + crate::utils::cargo_bin() + .arg("compress") + .args(files_path.clone()) + .arg(archive) + .assert() + .success(); + + crate::utils::cargo_bin() + .arg("decompress") + .arg(archive) + .arg("-d") + .arg(&dest_files_path) + .assert() + .success(); + + assert_same_directory(&src_files_path, &dest_files_path, false); + // check the symlink stand still + for f in dest_files_path.as_path().read_dir()? { + let f = f?; + if f.file_name() == "symlink" { + assert!(f.file_type()?.is_symlink()) + } + } + + fs::remove_file(archive)?; + fs::remove_dir_all(&dest_files_path)?; + + crate::utils::cargo_bin() + .arg("compress") + .arg("--follow-symlinks") + .args(files_path) + .arg(archive) + .assert() + .success(); + + crate::utils::cargo_bin() + .arg("decompress") + .arg(archive) + .arg("-d") + .arg(&dest_files_path) + .assert() + .success(); + + // check there is no symlinks + for f in dest_files_path.as_path().read_dir()? { + let f = f?; + assert!(!f.file_type().unwrap().is_symlink()) + } +} diff --git a/tests/utils.rs b/tests/utils.rs index 3648cef..4d2a84f 100644 --- a/tests/utils.rs +++ b/tests/utils.rs @@ -88,7 +88,7 @@ pub fn assert_same_directory(x: impl Into, y: impl Into, prese if ft_x.is_dir() && ft_y.is_dir() { assert_same_directory(x.path(), y.path(), preserve_permissions); - } else if ft_x.is_file() && ft_y.is_file() { + } else if (ft_x.is_file() && ft_y.is_file()) || (ft_x.is_symlink() && ft_y.is_symlink()) { assert_eq!(meta_x.len(), meta_y.len()); assert_eq!(fs::read(x.path()).unwrap(), fs::read(y.path()).unwrap()); } else {