Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-81340: Use copy_file_range in shutil.copyfile copy functions #93152

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Doc/library/shutil.rst
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ the use of userspace buffers in Python as in "``outfd.write(infd.read())``".

On macOS `fcopyfile`_ is used to copy the file content (not metadata).

On Linux :func:`os.sendfile` is used.
On Linux :func:`os.copy_file_range` or :func:`os.sendfile` is used.

On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
instead of 64 KiB) and a :func:`memoryview`-based variant of
Expand All @@ -529,6 +529,10 @@ file then shutil will silently fallback on using less efficient

.. versionchanged:: 3.8

.. versionchanged:: 3.14
Copy-on-write or server-side copy may be used internally via
:func:`os.copy_file_range` on supported Linux filesystems.

.. _shutil-copytree-example:

copytree example
Expand Down
96 changes: 76 additions & 20 deletions Lib/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
# https://bugs.python.org/issue43743#msg393429
_USE_CP_SENDFILE = (hasattr(os, "sendfile")
and sys.platform.startswith(("linux", "android")))
_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range")
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS

# CMD defaults in Windows 10
Expand Down Expand Up @@ -109,6 +110,66 @@ def _fastcopy_fcopyfile(fsrc, fdst, flags):
else:
raise err from None

def _determine_linux_fastcopy_blocksize(infd):
"""Determine blocksize for fastcopying on Linux.

Hopefully the whole file will be copied in a single call.
The copying itself should be performed in a loop 'till EOF is
reached (0 return) so a blocksize smaller or bigger than the actual
file size should not make any difference, also in case the file
content changes while being copied.
"""
try:
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB
except OSError:
blocksize = 2 ** 27 # 128 MiB
# On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
# see gh-82500.
if sys.maxsize < 2 ** 32:
blocksize = min(blocksize, 2 ** 30)
return blocksize

def _fastcopy_copy_file_range(fsrc, fdst):
"""Copy data from one regular mmap-like fd to another by using
a high-performance copy_file_range(2) syscall that gives filesystems
an opportunity to implement the use of reflinks or server-side copy.

This should work on Linux >= 4.5 only.
"""
try:
infd = fsrc.fileno()
outfd = fdst.fileno()
except Exception as err:
raise _GiveupOnFastCopy(err) # not a regular file

blocksize = _determine_linux_fastcopy_blocksize(infd)
offset = 0
while True:
try:
n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset)
except OSError as err:
# ...in oder to have a more informative exception.
err.filename = fsrc.name
err.filename2 = fdst.name

if err.errno == errno.ENOSPC: # filesystem is full
raise err from None

# Give up on first call and if no data was copied.
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
raise _GiveupOnFastCopy(err)

raise err
else:
if n_copied == 0:
# If no bytes have been copied yet, copy_file_range
# might silently fail.
# https://lore.kernel.org/linux-fsdevel/[email protected]/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
if offset == 0:
raise _GiveupOnFastCopy()
barneygale marked this conversation as resolved.
Show resolved Hide resolved
break
offset += n_copied

def _fastcopy_sendfile(fsrc, fdst):
"""Copy data from one regular mmap-like fd to another by using
high-performance sendfile(2) syscall.
Expand All @@ -130,20 +191,7 @@ def _fastcopy_sendfile(fsrc, fdst):
except Exception as err:
raise _GiveupOnFastCopy(err) # not a regular file

# Hopefully the whole file will be copied in a single call.
# sendfile() is called in a loop 'till EOF is reached (0 return)
# so a bufsize smaller or bigger than the actual file size
# should not make any difference, also in case the file content
# changes while being copied.
try:
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB
except OSError:
blocksize = 2 ** 27 # 128MiB
# On 32-bit architectures truncate to 1GiB to avoid OverflowError,
# see bpo-38319.
if sys.maxsize < 2 ** 32:
blocksize = min(blocksize, 2 ** 30)

blocksize = _determine_linux_fastcopy_blocksize(infd)
offset = 0
while True:
try:
Expand Down Expand Up @@ -268,12 +316,20 @@ def copyfile(src, dst, *, follow_symlinks=True):
except _GiveupOnFastCopy:
pass
# Linux
elif _USE_CP_SENDFILE:
try:
_fastcopy_sendfile(fsrc, fdst)
return dst
except _GiveupOnFastCopy:
pass
elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE:
# reflink may be implicit in copy_file_range.
if _USE_CP_COPY_FILE_RANGE:
try:
_fastcopy_copy_file_range(fsrc, fdst)
return dst
except _GiveupOnFastCopy:
pass
if _USE_CP_SENDFILE:
try:
_fastcopy_sendfile(fsrc, fdst)
return dst
except _GiveupOnFastCopy:
pass
# Windows, see:
# https://github.com/python/cpython/pull/7160#discussion_r195405230
elif _WINDOWS and file_size > 0:
Expand Down
71 changes: 50 additions & 21 deletions Lib/test/test_shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3184,12 +3184,8 @@ def test_filesystem_full(self):
self.assertRaises(OSError, self.zerocopy_fun, src, dst)


@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
PATCHPOINT = "os.sendfile"

def zerocopy_fun(self, fsrc, fdst):
return shutil._fastcopy_sendfile(fsrc, fdst)
class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest):
BLOCKSIZE_INDEX = None

def test_non_regular_file_src(self):
with io.BytesIO(self.FILEDATA) as src:
Expand All @@ -3210,65 +3206,65 @@ def test_non_regular_file_dst(self):
self.assertEqual(dst.read(), self.FILEDATA)

def test_exception_on_second_call(self):
def sendfile(*args, **kwargs):
def syscall(*args, **kwargs):
if not flag:
flag.append(None)
return orig_sendfile(*args, **kwargs)
return orig_syscall(*args, **kwargs)
else:
raise OSError(errno.EBADF, "yo")

flag = []
orig_sendfile = os.sendfile
with unittest.mock.patch('os.sendfile', create=True,
side_effect=sendfile):
orig_syscall = eval(self.PATCHPOINT)
with unittest.mock.patch(self.PATCHPOINT, create=True,
side_effect=syscall):
with self.get_files() as (src, dst):
with self.assertRaises(OSError) as cm:
shutil._fastcopy_sendfile(src, dst)
self.zerocopy_fun(src, dst)
assert flag
self.assertEqual(cm.exception.errno, errno.EBADF)

def test_cant_get_size(self):
# Emulate a case where src file size cannot be determined.
# Internally bufsize will be set to a small value and
# sendfile() will be called repeatedly.
# a system call will be called repeatedly.
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
with self.get_files() as (src, dst):
shutil._fastcopy_sendfile(src, dst)
self.zerocopy_fun(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_small_chunks(self):
# Force internal file size detection to be smaller than the
# actual file size. We want to force sendfile() to be called
# actual file size. We want to force a system call to be called
# multiple times, also in order to emulate a src fd which gets
# bigger while it is being copied.
mock = unittest.mock.Mock()
mock.st_size = 65536 + 1
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
shutil._fastcopy_sendfile(src, dst)
self.zerocopy_fun(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_big_chunk(self):
# Force internal file size detection to be +100MB bigger than
# the actual file size. Make sure sendfile() does not rely on
# the actual file size. Make sure a system call does not rely on
# file size value except for (maybe) a better throughput /
# performance.
mock = unittest.mock.Mock()
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
shutil._fastcopy_sendfile(src, dst)
self.zerocopy_fun(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_blocksize_arg(self):
with unittest.mock.patch('os.sendfile',
with unittest.mock.patch(self.PATCHPOINT,
side_effect=ZeroDivisionError) as m:
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN, TESTFN2)
blocksize = m.call_args[0][3]
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
# Make sure file size and the block size arg passed to
# sendfile() are the same.
self.assertEqual(blocksize, os.path.getsize(TESTFN))
Expand All @@ -3278,9 +3274,19 @@ def test_blocksize_arg(self):
self.addCleanup(os_helper.unlink, TESTFN2 + '3')
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN2, TESTFN2 + '3')
blocksize = m.call_args[0][3]
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
self.assertEqual(blocksize, 2 ** 23)


@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False)
class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase):
PATCHPOINT = "os.sendfile"
BLOCKSIZE_INDEX = 3

def zerocopy_fun(self, fsrc, fdst):
return shutil._fastcopy_sendfile(fsrc, fdst)

def test_file2file_not_supported(self):
# Emulate a case where sendfile() only support file->socket
# fds. In such a case copyfile() is supposed to skip the
Expand All @@ -3303,6 +3309,29 @@ def test_file2file_not_supported(self):
shutil._USE_CP_SENDFILE = True


@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported")
class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase):
PATCHPOINT = "os.copy_file_range"
BLOCKSIZE_INDEX = 2

def zerocopy_fun(self, fsrc, fdst):
return shutil._fastcopy_copy_file_range(fsrc, fdst)

def test_empty_file(self):
srcname = f"{TESTFN}src"
dstname = f"{TESTFN}dst"
self.addCleanup(lambda: os_helper.unlink(srcname))
self.addCleanup(lambda: os_helper.unlink(dstname))
with open(srcname, "wb"):
pass

with open(srcname, "rb") as src, open(dstname, "wb") as dst:
# _fastcopy_copy_file_range gives up copying empty files due
# to a bug in older Linux.
with self.assertRaises(shutil._GiveupOnFastCopy):
self.zerocopy_fun(src, dst)


@unittest.skipIf(not MACOS, 'macOS only')
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
PATCHPOINT = "posix._fcopyfile"
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1946,6 +1946,7 @@ Johannes Vogel
Michael Vogt
Radu Voicilas
Alex Volkov
Illia Volochii
Ruben Vorderman
Guido Vranken
Martijn Vries
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`,
and :func:`shutil.copyfile` functions by default. An underlying Linux system
call gives filesystems an opportunity to implement the use of copy-on-write
(in case of btrfs and XFS) or server-side copy (in the case of NFS.)
Patch by Illia Volochii.
Loading