Skip to content

Commit

Permalink
cp: Implement --sparse flag
Browse files Browse the repository at this point in the history
This begins to address #3362

At the moment, only the `--sparse=always` logic matches the requirement
form GNU cp info page, i.e. always make holes in destination when
possible.

Sparse copy is done by copying the source to the destination block by
block (blocks being of the destination's fs block size). If the block
only holds NUL bytes, we don't write to the destination.

About `--sparse=auto`: according to GNU cp info page, the destination
file will be made sparse if the source file is sparse as well. The next
step are likely to use `lseek` with `SEEK_HOLE` detect if the source
file has holes. Currently, this has the same behaviour as
`--sparse=never`. This `SEEK_HOLE` logic can also be applied to
`--sparse=always` to improve performance when copying sparse files.

About `--sparse=never`: from my understanding, it is not guaranteed that
Rust's `fs::copy` will always produce a file with no holes, as
["platform-specific behavior may change in the
future"](https://doc.rust-lang.org/std/fs/fn.copy.html#platform-specific-behavior)

About other platforms:
 - `macos`: The solution may be to use `fcntl` command `F_PUNCHHOLE`.
 - `windows`: I only see `FSCTL_SET_SPARSE`.

This should pass the following GNU tests:
 - `tests/cp/sparse.sh`
 - `tests/cp/sparse-2.sh`
 - `tests/cp/sparse-extents.sh`
 - `tests/cp/sparse-extents-2.sh`

`sparse-perf.sh` needs `--sparse=auto`, and in particular a way to skip
holes in the source file.
  • Loading branch information
pimzero committed Aug 2, 2022
1 parent 8cd0542 commit 7fd289f
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 12 deletions.
72 changes: 60 additions & 12 deletions src/uu/cp/src/cp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// For the full copyright and license information, please view the LICENSE file
// that was distributed with this source code.

// spell-checker:ignore (ToDO) ficlone linkgs lstat nlink nlinks pathbuf reflink strs xattrs symlinked
// spell-checker:ignore (ToDO) ficlone ftruncate linkgs lstat nlink nlinks pathbuf reflink strs xattrs symlinked

#[macro_use]
extern crate quick_error;
Expand Down Expand Up @@ -1563,16 +1563,18 @@ fn copy_no_cow_fallback(
sparse_mode: SparseMode,
context: &str,
) -> CopyResult<()> {
if (reflink_mode != ReflinkMode::Never) {
if reflink_mode != ReflinkMode::Never {
return Err("--reflink is only supported on linux and macOS"
.to_string()
.into());
}
if (sparse_mode != SparseMode::Auto) {
if sparse_mode != SparseMode::Auto {
return Err("--sparse is only supported on linux".to_string().into());
}

fs::copy(source, dest).context(context)
fs::copy(source, dest).context(context)?;

Ok(())
}

/// Copies `source` to `dest` using copy-on-write if possible.
Expand All @@ -1581,18 +1583,21 @@ fn copy_on_write_linux(
source: &Path,
dest: &Path,
reflink_mode: ReflinkMode,
_sparse_mode: SparseMode,
sparse_mode: SparseMode,
context: &str,
) -> CopyResult<()> {
let src_file = File::open(source).context(context)?;
use std::os::unix::prelude::MetadataExt;

let mut src_file = File::open(source).context(context)?;
let dst_file = OpenOptions::new()
.write(true)
.truncate(false)
.truncate(true)
.create(true)
.open(dest)
.context(context)?;
match reflink_mode {
ReflinkMode::Always => unsafe {

match (reflink_mode, sparse_mode) {
(ReflinkMode::Always, SparseMode::Auto) => unsafe {
let result = libc::ioctl(dst_file.as_raw_fd(), FICLONE!(), src_file.as_raw_fd());

if result != 0 {
Expand All @@ -1607,15 +1612,58 @@ fn copy_on_write_linux(
Ok(())
}
},
ReflinkMode::Auto => unsafe {
(ReflinkMode::Always, SparseMode::Always) | (ReflinkMode::Always, SparseMode::Never) => {
Err("`--reflink=always` can be used only with --sparse=auto".into())
}
(_, SparseMode::Always) => unsafe {
let size: usize = src_file.metadata()?.size().try_into().unwrap();
if libc::ftruncate(dst_file.as_raw_fd(), i64::try_from(size).unwrap()) < 0 {
return Err(format!(
"failed to ftruncate {:?} to size {}: {}",
dest,
size,
std::io::Error::last_os_error()
)
.into());
}

let blksize = dst_file.metadata()?.blksize();
let mut buf: Vec<u8> = vec![0; blksize.try_into().unwrap()];
let mut current_offset: usize = 0;

while current_offset < size {
use std::io::Read;
let (mut off_src, mut off_dst): (i64, i64) = (
current_offset.try_into().unwrap(),
current_offset.try_into().unwrap(),
);

let this_read = src_file.read(&mut buf)?;
current_offset += this_read;

if buf.iter().any(|&x| x != 0) {
const COPY_FLAGS: u32 = 0;
libc::copy_file_range(
src_file.as_raw_fd(),
&mut off_src,
dst_file.as_raw_fd(),
&mut off_dst,
this_read,
COPY_FLAGS,
);
}
}
Ok(())
},
(ReflinkMode::Auto, SparseMode::Auto) | (ReflinkMode::Auto, SparseMode::Never) => unsafe {
let result = libc::ioctl(dst_file.as_raw_fd(), FICLONE!(), src_file.as_raw_fd());

if result != 0 {
fs::copy(source, dest).context(context)?;
}
Ok(())
},
ReflinkMode::Never => {
(ReflinkMode::Never, _) => {
fs::copy(source, dest).context(context)?;
Ok(())
}
Expand All @@ -1631,7 +1679,7 @@ fn copy_on_write_macos(
sparse_mode: SparseMode,
context: &str,
) -> CopyResult<()> {
if (sparse_mode != SparseMode::Auto) {
if sparse_mode != SparseMode::Auto {
return Err("--sparse is only supported on linux".to_string().into());
}

Expand Down
111 changes: 111 additions & 0 deletions tests/by-util/test_cp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,117 @@ fn test_closes_file_descriptors() {
.succeeds();
}

#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_never_empty() {
let (at, mut ucmd) = at_and_ucmd!();

const BUFFER_SIZE: usize = 4096 * 4;
let buf: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];

at.make_file("src_file1");
at.write_bytes("src_file1", &buf);

ucmd.args(&["--sparse=never", "src_file1", "dst_file_non_sparse"])
.succeeds();
assert_eq!(at.read_bytes("dst_file_non_sparse"), buf);
assert_eq!(
at.metadata("dst_file_non_sparse").blocks() * 512,
buf.len() as u64
);
}

#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_always_empty() {
let (at, mut ucmd) = at_and_ucmd!();

const BUFFER_SIZE: usize = 4096 * 4;
let buf: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];

at.make_file("src_file1");
at.write_bytes("src_file1", &buf);

ucmd.args(&["--sparse=always", "src_file1", "dst_file_sparse"])
.succeeds();

assert_eq!(at.read_bytes("dst_file_sparse"), buf);
assert_eq!(at.metadata("dst_file_sparse").blocks(), 0);
}

#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_always_non_empty() {
let (at, mut ucmd) = at_and_ucmd!();

const BUFFER_SIZE: usize = 4096 * 16 + 3;
let mut buf: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];
let blocks_to_touch = [buf.len() / 3, 2 * (buf.len() / 3)];

for i in blocks_to_touch {
buf[i] = b'x';
}

at.make_file("src_file1");
at.write_bytes("src_file1", &buf);

ucmd.args(&["--sparse=always", "src_file1", "dst_file_sparse"])
.succeeds();

let touched_block_count =
blocks_to_touch.len() as u64 * at.metadata("dst_file_sparse").blksize() / 512;

assert_eq!(at.read_bytes("dst_file_sparse"), buf);
assert_eq!(at.metadata("dst_file_sparse").blocks(), touched_block_count);
}

#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_invalid_option() {
let (at, mut ucmd) = at_and_ucmd!();

at.make_file("src_file1");

ucmd.args(&[
"--sparse=invalid",
"src_file1",
"dst_file",
])
.fails();
}

#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_always_reflink_always() {
let (at, mut ucmd) = at_and_ucmd!();

at.make_file("src_file1");

ucmd.args(&[
"--sparse=always",
"--reflink=always",
"src_file1",
"dst_file",
])
.fails();
}

#[cfg(any(target_os = "linux", target_os = "android"))]
#[test]
fn test_cp_sparse_never_reflink_always() {
let (at, mut ucmd) = at_and_ucmd!();

at.make_file("src_file1");

ucmd.args(&[
"--sparse=never",
"--reflink=always",
"src_file1",
"dst_file",
])
.fails();
}

#[test]
fn test_copy_dir_symlink() {
let (at, mut ucmd) = at_and_ucmd!();
Expand Down

0 comments on commit 7fd289f

Please sign in to comment.