From 7128a7ac445166fabb9b68b38dcc6af15e6c1113 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 14 May 2024 22:34:30 +0100 Subject: [PATCH 01/16] GH-73991: Add `pathlib.Path.copy()` method. Add a `Path.copy()` method that copies a file to a target file or directory using `shutil.copy2()`. In the private pathlib ABCs, we add a version that supports copying from one instance of `PathBase` to another. We don't copy metadata, because doing so probably requires new APIs that haven't been designed yet. --- Doc/library/pathlib.rst | 17 ++++++ Doc/whatsnew/3.14.rst | 7 +++ Lib/pathlib/_abc.py | 35 ++++++++++++ Lib/pathlib/_local.py | 17 ++++++ Lib/test/test_pathlib/test_pathlib.py | 67 +++++++++++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 41 ++++++++++++++ 6 files changed, 184 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 27ed0a32e801cc..eacdaecc333836 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1398,6 +1398,23 @@ call fails (for example because the path doesn't exist). available. In previous versions, :exc:`NotImplementedError` was raised. +.. method:: Path.copy(target, *, follow_symlinks=True) + + Copy this file and its metadata to the file or directory *target*. If + *target* specifies a directory, the file will be copied into *target* using + this file's :attr:`~PurePath.name`. If *target* specifies a file that + already exists, it will be replaced. Returns the path to the newly created + file. + + If *follow_symlinks* is false, and this file is a symbolic link, *target* + will be created as a symbolic link. If *follow_symlinks* is true and this + file is a symbolic link, *target* will be a copy of the symlink target. + + This method calls :func:`shutil.copy2` internally. + + .. versionadded:: 3.14 + + .. method:: Path.rename(target) Rename this file or directory to the given *target*, and return a new Path diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 33a0f3e0f2f4bc..ed29cd4f4d7a1d 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -86,6 +86,13 @@ New Modules Improved Modules ================ +pathlib +------- + +* Add :meth:`pathlib.Path.copy`, which copies a file to a target file or + directory, like :func:`shutil.copy2`. + (Contributed by Barney Gale in :gh:`73991`.) + Optimizations ============= diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 568a17df26fc33..38a6d5e3c2dda4 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -14,6 +14,7 @@ import functools from glob import _Globber, _no_recurse_symlinks from errno import ENOTDIR, ELOOP +from shutil import copyfileobj from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -539,6 +540,14 @@ def samefile(self, other_path): return (st.st_ino == other_st.st_ino and st.st_dev == other_st.st_dev) + def _samefile_safe(self, other_path): + """Like samefile(), but returns False rather than raising OSError. + """ + try: + return self.samefile(other_path) + except (OSError, ValueError): + return False + def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ @@ -757,6 +766,32 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ raise UnsupportedOperation(self._unsupported_msg('mkdir()')) + def copy(self, target, follow_symlinks=True): + """ + Copy this file and its metadata to the given target. Returns the path + of the new file. + + If this file is a symlink and *follow_symlinks* is true (the default), + the symlink's target is copied. Otherwise, the symlink is recreated at + the target. + """ + if not isinstance(target, PathBase): + target = self.with_segments(target) + if target.is_dir(): + target /= self.name + if self._samefile_safe(target): + raise OSError(f"{self!r} and {target!r} are the same file") + if not follow_symlinks and self.is_symlink(): + target.symlink_to(self.readlink()) + return target + + with self.open('rb') as f_source: + with target.open('wb') as f_target: + copyfileobj(f_source, f_target) + + # FIXME: how do we copy metadata between PathBase instances? + return target + def rename(self, target): """ Rename this path to the target path. diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 7dc071949b9bd7..0f2c1676042ca1 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -3,6 +3,7 @@ import operator import os import posixpath +import shutil import sys import warnings from glob import _StringGlobber @@ -751,6 +752,22 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): if not exist_ok or not self.is_dir(): raise + def copy(self, target, follow_symlinks=True): + """ + Copy this file and its metadata to the given target. Returns the path + of the new file. + + If this file is a symlink and *follow_symlinks* is true (the default), + the symlink's target is copied. Otherwise, the symlink is recreated at + the target. + """ + if not isinstance(target, PathBase): + target = self.with_segments(target) + if isinstance(target, Path): + target = shutil.copy2(self, target, follow_symlinks=follow_symlinks) + return self.with_segments(target) + return PathBase.copy(self, target, follow_symlinks=follow_symlinks) + def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 4fd2aac4a62139..2ce134936a5689 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -810,6 +810,73 @@ def test_hardlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.hardlink_to(p) + @unittest.skipUnless(hasattr(os, 'utime'), 'requires os.utime') + def test_copy(self): + base = self.cls(self.base) + source = base / 'fileA' + source_stat = source.stat() + target = source.copy(base / 'copyA') + target_stat = target.stat() + self.assertTrue(target.exists()) + self.assertEqual(source_stat.st_mode, target_stat.st_mode) + self.assertEqual(source.read_text(), target.read_text()) + for attr in 'st_atime', 'st_mtime': + # The modification times may be truncated in the new file. + self.assertLessEqual(getattr(source_stat, attr), + getattr(target_stat, attr) + 1) + if hasattr(os, 'chflags') and hasattr(source_stat, 'st_flags'): + self.assertEqual(getattr(source_stat, 'st_flags'), + getattr(target_stat, 'st_flags')) + + @needs_symlinks + def test_copy_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkA' + source_stat = source.stat() + source_lstat = source.lstat() + if hasattr(os, 'lchmod'): + os.lchmod(source, stat.S_IRWXU | stat.S_IRWXO) + if hasattr(os, 'lchflags') and hasattr(stat, 'UF_NODUMP'): + os.lchflags(source, stat.UF_NODUMP) + target = source.copy(base / 'copyA', follow_symlinks=False) + target_lstat = target.lstat() + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + if os.utime in os.supports_follow_symlinks: + for attr in 'st_atime', 'st_mtime': + # The modification times may be truncated in the new file. + self.assertLessEqual(getattr(source_lstat, attr), + getattr(target_lstat, attr) + 1) + if hasattr(os, 'lchmod'): + self.assertEqual(source_lstat.st_mode, target_lstat.st_mode) + self.assertNotEqual(source_stat.st_mode, target_lstat.st_mode) + if hasattr(os, 'lchflags') and hasattr(source_lstat, 'st_flags'): + self.assertEqual(source_lstat.st_flags, target_lstat.st_flags) + + @os_helper.skip_unless_xattr + def test_copy_xattr(self): + base = self.cls(self.base) + source = base / 'fileA' + os.setxattr(source, 'user.foo', b'42') + target = source.copy(base / 'copyA') + self.assertEqual( + os.getxattr(source, 'user.foo'), + os.getxattr(target, 'user.foo')) + + def test_copy_dir(self): + base = self.cls(self.base) + source = base / 'dirA' + target = base / 'copyA' + if sys.platform == "win32": + err = PermissionError + else: + err = IsADirectoryError + with self.assertRaises(err): + source.copy(target) + with self.assertRaises(err): + source.copy(target / 'does_not_exist/') + def test_rename(self): P = self.cls(self.base) p = P / 'fileA' diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index aadecbc142cca6..cf02682a1d8eb1 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1678,6 +1678,47 @@ def test_write_text_with_newlines(self): self.assertEqual((p / 'fileA').read_bytes(), b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq') + def test_copy(self): + base = self.cls(self.base) + source = base / 'fileA' + target = source.copy(base / 'copyA') + self.assertTrue(target.exists()) + self.assertEqual(source.stat().st_mode, target.stat().st_mode) + self.assertEqual(source.read_text(), target.read_text()) + + @needs_symlinks + def test_copy_follow_symlinks_true(self): + base = self.cls(self.base) + source = base / 'linkA' + target = source.copy(base / 'copyA') + self.assertTrue(target.exists()) + self.assertFalse(target.is_symlink()) + self.assertEqual(source.read_text(), target.read_text()) + + @needs_symlinks + def test_copy_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkA' + target = source.copy(base / 'copyA', follow_symlinks=False) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + def test_copy_return_value(self): + base = self.cls(self.base) + source = base / 'fileA' + self.assertEqual(source.copy(base / 'dirA'), base / 'dirA' / 'fileA') + self.assertEqual(source.copy(base / 'dirA' / 'copyA'), base / 'dirA' / 'copyA') + + def test_copy_dir(self): + base = self.cls(self.base) + source = base / 'dirA' + target = base / 'copyA' + with self.assertRaises(OSError): + source.copy(target) + with self.assertRaises(OSError): + source.copy(target / 'does_not_exist/') + def test_iterdir(self): P = self.cls p = P(self.base) From 4cf63e6bde04e8d37270aadcefc3b34344060bc5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 15 May 2024 01:36:18 +0100 Subject: [PATCH 02/16] Fix tests, add news --- Lib/test/test_pathlib/test_pathlib.py | 4 ++-- .../Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2ce134936a5689..8d04edff6ab5fe 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -832,12 +832,12 @@ def test_copy(self): def test_copy_follow_symlinks_false(self): base = self.cls(self.base) source = base / 'linkA' - source_stat = source.stat() - source_lstat = source.lstat() if hasattr(os, 'lchmod'): os.lchmod(source, stat.S_IRWXU | stat.S_IRWXO) if hasattr(os, 'lchflags') and hasattr(stat, 'UF_NODUMP'): os.lchflags(source, stat.UF_NODUMP) + source_stat = source.stat() + source_lstat = source.lstat() target = source.copy(base / 'copyA', follow_symlinks=False) target_lstat = target.lstat() self.assertTrue(target.exists()) diff --git a/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst new file mode 100644 index 00000000000000..f8e3465ae0cd78 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst @@ -0,0 +1,2 @@ +Add :meth:`pathlib.Path.copy`, which copies a file to a target file or +directory, like :func:`shutil.copy2`. From e83f53e33051fcb6d07bfeb7bbf51c04af5a5ef4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 15 May 2024 17:17:06 +0100 Subject: [PATCH 03/16] Comment on use of `shutil.copy2()` --- Lib/pathlib/_local.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 0f2c1676042ca1..36175686be5193 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -764,6 +764,8 @@ def copy(self, target, follow_symlinks=True): if not isinstance(target, PathBase): target = self.with_segments(target) if isinstance(target, Path): + # The source and target are *local* paths, so use shutil.copy2() + # to efficiently copy data and metadata using available OS APIs. target = shutil.copy2(self, target, follow_symlinks=follow_symlinks) return self.with_segments(target) return PathBase.copy(self, target, follow_symlinks=follow_symlinks) From 092a0e018ffecb8baa52ea36f02ab777ab223d7f Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 16 May 2024 00:35:04 +0100 Subject: [PATCH 04/16] Use fcopyfile() / sendfile() where available. --- Lib/pathlib/_abc.py | 4 ++-- Lib/shutil.py | 45 ++++++++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 38a6d5e3c2dda4..5ddf65fa75cb1c 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -14,7 +14,7 @@ import functools from glob import _Globber, _no_recurse_symlinks from errno import ENOTDIR, ELOOP -from shutil import copyfileobj +from shutil import _fastcopy from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -787,7 +787,7 @@ def copy(self, target, follow_symlinks=True): with self.open('rb') as f_source: with target.open('wb') as f_target: - copyfileobj(f_source, f_target) + _fastcopy(f_source, f_target) # FIXME: how do we copy metadata between PathBase instances? return target diff --git a/Lib/shutil.py b/Lib/shutil.py index c9b4da34b1e19b..ad12dfb96eb18a 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -89,6 +89,29 @@ class _GiveupOnFastCopy(Exception): file copy when fast-copy functions fail to do so. """ +def _fastcopy(fsrc, fdst, file_size=0): + # macOS + if _HAS_FCOPYFILE: + try: + _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) + return + except _GiveupOnFastCopy: + pass + # Linux + elif _USE_CP_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return + except _GiveupOnFastCopy: + pass + # Windows, see: + # https://github.com/python/cpython/pull/7160#discussion_r195405230 + elif _WINDOWS and file_size > 0: + _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) + return + + copyfileobj(fsrc, fdst) + def _fastcopy_fcopyfile(fsrc, fdst, flags): """Copy a regular file content or metadata by using high-performance fcopyfile(3) syscall (macOS). @@ -260,27 +283,7 @@ def copyfile(src, dst, *, follow_symlinks=True): with open(src, 'rb') as fsrc: try: with open(dst, 'wb') as fdst: - # macOS - if _HAS_FCOPYFILE: - try: - _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) - return dst - except _GiveupOnFastCopy: - pass - # Linux - elif _USE_CP_SENDFILE: - try: - _fastcopy_sendfile(fsrc, fdst) - return dst - except _GiveupOnFastCopy: - pass - # Windows, see: - # https://github.com/python/cpython/pull/7160#discussion_r195405230 - elif _WINDOWS and file_size > 0: - _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) - return dst - - copyfileobj(fsrc, fdst) + _fastcopy(fsrc, fdst, file_size) # Issue 43219, raise a less confusing exception except IsADirectoryError as e: From a9a216e846177a59cae14646eab29c2eb5ff9e59 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jun 2024 04:23:15 +0100 Subject: [PATCH 05/16] Drop usage of shutil --- Doc/library/pathlib.rst | 10 +- Doc/whatsnew/3.14.rst | 4 +- Lib/pathlib/_abc.py | 122 +++++++++++++++--- Lib/pathlib/_local.py | 32 +++-- Lib/shutil.py | 45 +++---- Lib/test/test_pathlib/test_pathlib.py | 67 ---------- Lib/test/test_pathlib/test_pathlib_abc.py | 54 +++++--- ...4-05-15-01-36-08.gh-issue-73991.CGknDf.rst | 4 +- 8 files changed, 186 insertions(+), 152 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 4820244a1f887d..0252c923fe8458 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1479,17 +1479,15 @@ example because the path doesn't exist). .. method:: Path.copy(target, *, follow_symlinks=True) - Copy this file and its metadata to the file or directory *target*. If - *target* specifies a directory, the file will be copied into *target* using - this file's :attr:`~PurePath.name`. If *target* specifies a file that - already exists, it will be replaced. Returns the path to the newly created - file. + Copy the contents of this file to the *target* file. If *target* specifies + a file that already exists, it will be replaced. If *follow_symlinks* is false, and this file is a symbolic link, *target* will be created as a symbolic link. If *follow_symlinks* is true and this file is a symbolic link, *target* will be a copy of the symlink target. - This method calls :func:`shutil.copy2` internally. + This method doesn't guarantee whether permissions, ownership, and other + metadata is copied; it may vary by operating system and Python version. .. versionadded:: 3.14 diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 27795349141ef5..fdb3bb32a408da 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -95,8 +95,8 @@ Added :func:`ast.compare` for comparing two ASTs. pathlib ------- -* Add :meth:`pathlib.Path.copy`, which copies a file to a target file or - directory, like :func:`shutil.copy2`. +* Add :meth:`pathlib.Path.copy`, which copies the content of one file to + another, like :func:`shutil.copyfile`. (Contributed by Barney Gale in :gh:`73991`.) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 3296b2d2c70917..7fe50fe8e0dc5b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,10 +12,15 @@ """ import functools +import os +import sys from glob import _Globber, _no_recurse_symlinks from errno import ENOTDIR, ELOOP -from shutil import _fastcopy from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +try: + import posix +except ImportError: + posix = None __all__ = ["UnsupportedOperation"] @@ -26,6 +31,25 @@ def _is_case_sensitive(parser): return parser.normcase('Aa') == 'Aa' +def _get_copy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB + except OSError: + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + + class UnsupportedOperation(NotImplementedError): """An exception that is raised when an unsupported operation is called on a path object. @@ -537,7 +561,8 @@ def samefile(self, other_path): st.st_dev == other_st.st_dev) def _samefile_safe(self, other_path): - """Like samefile(), but returns False rather than raising OSError. + """ + Like samefile(), but returns False rather than raising OSError. """ try: return self.samefile(other_path) @@ -794,29 +819,90 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): def copy(self, target, follow_symlinks=True): """ - Copy this file and its metadata to the given target. Returns the path - of the new file. - - If this file is a symlink and *follow_symlinks* is true (the default), - the symlink's target is copied. Otherwise, the symlink is recreated at - the target. + Copy the contents of this file to the given target. If this file is a + symlink and follow_symlinks is false, a symlink will be created at the + target. """ if not isinstance(target, PathBase): target = self.with_segments(target) - if target.is_dir(): - target /= self.name if self._samefile_safe(target): raise OSError(f"{self!r} and {target!r} are the same file") if not follow_symlinks and self.is_symlink(): target.symlink_to(self.readlink()) - return target - - with self.open('rb') as f_source: - with target.open('wb') as f_target: - _fastcopy(f_source, f_target) - - # FIXME: how do we copy metadata between PathBase instances? - return target + return + with self.open('rb') as source_f: + try: + with target.open('wb') as target_f: + try: + # Use fast FD-based implementation if file objects are + # backed by FDs, and the OS supports it. + copyfd = self._copyfd + source_fd = source_f.fileno() + target_fd = target_f.fileno() + except Exception: + # Otherwise use file objects' read() and write(). + read_source = source_f.read + write_target = target_f.write + while buf := read_source(1024 * 1024): + write_target(buf) + else: + try: + copyfd(source_fd, target_fd) + except OSError as err: + # Produce more useful error messages. + err.filename = str(self) + err.filename2 = str(target) + raise err + except IsADirectoryError as e: + if not target.exists(): + # Raise a less confusing exception. + raise FileNotFoundError( + f'Directory does not exist: {target}') from e + else: + raise + + if hasattr(os, 'copy_file_range'): + @staticmethod + def _copyfd(source_fd, target_fd): + """ + Copy data from one regular mmap-like fd to another by using a + high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side + copy. + This should work on Linux >= 4.5 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.copy_file_range(source_fd, target_fd, blocksize, + offset_dst=offset) + if sent == 0: + break # EOF + offset += sent + + elif hasattr(os, 'sendfile'): + @staticmethod + def _copyfd(source_fd, target_fd): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.sendfile(target_fd, source_fd, offset, blocksize) + if sent == 0: + break # EOF + offset += sent + + elif hasattr(posix, '_fcopyfile'): + @staticmethod + def _copyfd(source_fd, target_fd): + """ + Copy a regular file content using high-performance fcopyfile(3) + syscall (macOS). + """ + posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) def rename(self, target): """ diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 7c701a3dc1832b..dcad491f60960b 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -3,7 +3,6 @@ import operator import os import posixpath -import shutil import sys from glob import _StringGlobber from itertools import chain @@ -17,6 +16,10 @@ import grp except ImportError: grp = None +try: + import _winapi +except ImportError: + _winapi = None from ._abc import UnsupportedOperation, PurePathBase, PathBase @@ -781,23 +784,18 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): if not exist_ok or not self.is_dir(): raise - def copy(self, target, follow_symlinks=True): - """ - Copy this file and its metadata to the given target. Returns the path - of the new file. + if hasattr(_winapi, 'CopyFile2'): + def copy(self, target, follow_symlinks=True): + """ + Copy the contents of this file to the given target. If this file is a + symlink and follow_symlinks is false, a symlink will be created at the + target. + """ + if isinstance(target, PathBase) and not isinstance(target, Path): + return PathBase.copy(self, target, follow_symlinks=follow_symlinks) - If this file is a symlink and *follow_symlinks* is true (the default), - the symlink's target is copied. Otherwise, the symlink is recreated at - the target. - """ - if not isinstance(target, PathBase): - target = self.with_segments(target) - if isinstance(target, Path): - # The source and target are *local* paths, so use shutil.copy2() - # to efficiently copy data and metadata using available OS APIs. - target = shutil.copy2(self, target, follow_symlinks=follow_symlinks) - return self.with_segments(target) - return PathBase.copy(self, target, follow_symlinks=follow_symlinks) + flags = 0 if follow_symlinks else _winapi.COPY_FILE_COPY_SYMLINK + _winapi.CopyFile2(os.fspath(self), os.fspath(target), flags) def chmod(self, mode, *, follow_symlinks=True): """ diff --git a/Lib/shutil.py b/Lib/shutil.py index 6b8860c58ee1a7..b0d49e98cfe5f9 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -89,29 +89,6 @@ class _GiveupOnFastCopy(Exception): file copy when fast-copy functions fail to do so. """ -def _fastcopy(fsrc, fdst, file_size=0): - # macOS - if _HAS_FCOPYFILE: - try: - _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) - return - except _GiveupOnFastCopy: - pass - # Linux - elif _USE_CP_SENDFILE: - try: - _fastcopy_sendfile(fsrc, fdst) - return - except _GiveupOnFastCopy: - pass - # Windows, see: - # https://github.com/python/cpython/pull/7160#discussion_r195405230 - elif _WINDOWS and file_size > 0: - _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) - return - - copyfileobj(fsrc, fdst) - def _fastcopy_fcopyfile(fsrc, fdst, flags): """Copy a regular file content or metadata by using high-performance fcopyfile(3) syscall (macOS). @@ -283,7 +260,27 @@ def copyfile(src, dst, *, follow_symlinks=True): with open(src, 'rb') as fsrc: try: with open(dst, 'wb') as fdst: - _fastcopy(fsrc, fdst, file_size) + # macOS + if _HAS_FCOPYFILE: + try: + _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) + return dst + except _GiveupOnFastCopy: + pass + # Linux + elif _USE_CP_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + # Windows, see: + # https://github.com/python/cpython/pull/7160#discussion_r195405230 + elif _WINDOWS and file_size > 0: + _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) + return dst + + copyfileobj(fsrc, fdst) # Issue 43219, raise a less confusing exception except IsADirectoryError as e: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index ec779850fe5c38..3df354eb25a58c 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -810,73 +810,6 @@ def test_hardlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.hardlink_to(p) - @unittest.skipUnless(hasattr(os, 'utime'), 'requires os.utime') - def test_copy(self): - base = self.cls(self.base) - source = base / 'fileA' - source_stat = source.stat() - target = source.copy(base / 'copyA') - target_stat = target.stat() - self.assertTrue(target.exists()) - self.assertEqual(source_stat.st_mode, target_stat.st_mode) - self.assertEqual(source.read_text(), target.read_text()) - for attr in 'st_atime', 'st_mtime': - # The modification times may be truncated in the new file. - self.assertLessEqual(getattr(source_stat, attr), - getattr(target_stat, attr) + 1) - if hasattr(os, 'chflags') and hasattr(source_stat, 'st_flags'): - self.assertEqual(getattr(source_stat, 'st_flags'), - getattr(target_stat, 'st_flags')) - - @needs_symlinks - def test_copy_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkA' - if hasattr(os, 'lchmod'): - os.lchmod(source, stat.S_IRWXU | stat.S_IRWXO) - if hasattr(os, 'lchflags') and hasattr(stat, 'UF_NODUMP'): - os.lchflags(source, stat.UF_NODUMP) - source_stat = source.stat() - source_lstat = source.lstat() - target = source.copy(base / 'copyA', follow_symlinks=False) - target_lstat = target.lstat() - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - if os.utime in os.supports_follow_symlinks: - for attr in 'st_atime', 'st_mtime': - # The modification times may be truncated in the new file. - self.assertLessEqual(getattr(source_lstat, attr), - getattr(target_lstat, attr) + 1) - if hasattr(os, 'lchmod'): - self.assertEqual(source_lstat.st_mode, target_lstat.st_mode) - self.assertNotEqual(source_stat.st_mode, target_lstat.st_mode) - if hasattr(os, 'lchflags') and hasattr(source_lstat, 'st_flags'): - self.assertEqual(source_lstat.st_flags, target_lstat.st_flags) - - @os_helper.skip_unless_xattr - def test_copy_xattr(self): - base = self.cls(self.base) - source = base / 'fileA' - os.setxattr(source, 'user.foo', b'42') - target = source.copy(base / 'copyA') - self.assertEqual( - os.getxattr(source, 'user.foo'), - os.getxattr(target, 'user.foo')) - - def test_copy_dir(self): - base = self.cls(self.base) - source = base / 'dirA' - target = base / 'copyA' - if sys.platform == "win32": - err = PermissionError - else: - err = IsADirectoryError - with self.assertRaises(err): - source.copy(target) - with self.assertRaises(err): - source.copy(target / 'does_not_exist/') - def test_rename(self): P = self.cls(self.base) p = P / 'fileA' diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 7b1f2a314d622a..dbc892aa188b62 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1696,46 +1696,68 @@ def test_write_text_with_newlines(self): self.assertEqual((p / 'fileA').read_bytes(), b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq') - def test_copy(self): + def test_copy_file(self): base = self.cls(self.base) source = base / 'fileA' - target = source.copy(base / 'copyA') + target = base / 'copyA' + source.copy(target) self.assertTrue(target.exists()) - self.assertEqual(source.stat().st_mode, target.stat().st_mode) self.assertEqual(source.read_text(), target.read_text()) + def test_copy_directory(self): + base = self.cls(self.base) + source = base / 'dirA' + target = base / 'copyA' + with self.assertRaises(IsADirectoryError): + source.copy(target) + @needs_symlinks - def test_copy_follow_symlinks_true(self): + def test_copy_symlink_follow_symlinks_true(self): base = self.cls(self.base) source = base / 'linkA' - target = source.copy(base / 'copyA') + target = base / 'copyA' + source.copy(target) self.assertTrue(target.exists()) self.assertFalse(target.is_symlink()) self.assertEqual(source.read_text(), target.read_text()) @needs_symlinks - def test_copy_follow_symlinks_false(self): + def test_copy_symlink_follow_symlinks_false(self): base = self.cls(self.base) source = base / 'linkA' - target = source.copy(base / 'copyA', follow_symlinks=False) + target = base / 'copyA' + source.copy(target, follow_symlinks=False) self.assertTrue(target.exists()) self.assertTrue(target.is_symlink()) self.assertEqual(source.readlink(), target.readlink()) - def test_copy_return_value(self): + def test_copy_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' - self.assertEqual(source.copy(base / 'dirA'), base / 'dirA' / 'fileA') - self.assertEqual(source.copy(base / 'dirA' / 'copyA'), base / 'dirA' / 'copyA') + target = base / 'dirB' / 'fileB' + source.copy(target) + self.assertTrue(target.exists()) + self.assertEqual(source.read_text(), target.read_text()) - def test_copy_dir(self): + def test_copy_to_existing_directory(self): base = self.cls(self.base) - source = base / 'dirA' - target = base / 'copyA' - with self.assertRaises(OSError): + source = base / 'fileA' + target = base / 'dirA' + with self.assertRaises(IsADirectoryError): source.copy(target) - with self.assertRaises(OSError): - source.copy(target / 'does_not_exist/') + + @needs_symlinks + def test_copy_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + source.copy(target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) def test_iterdir(self): P = self.cls diff --git a/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst index f8e3465ae0cd78..c2953c65b2720f 100644 --- a/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst +++ b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst @@ -1,2 +1,2 @@ -Add :meth:`pathlib.Path.copy`, which copies a file to a target file or -directory, like :func:`shutil.copy2`. +Add :meth:`pathlib.Path.copy`, which copies the content of one file to another, +like :func:`shutil.copyfile`. From cbf61748312bcef50158ca33cc3cc8276b65c284 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jun 2024 08:39:50 +0100 Subject: [PATCH 06/16] Fix tests, add FICLONE support --- Lib/pathlib/_abc.py | 150 +++++++++++++--------- Lib/test/test_pathlib/test_pathlib_abc.py | 4 +- 2 files changed, 92 insertions(+), 62 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 7fe50fe8e0dc5b..21fd6e4cc6d602 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -15,8 +15,12 @@ import os import sys from glob import _Globber, _no_recurse_symlinks -from errno import ENOTDIR, ELOOP +from errno import ENOTDIR, ELOOP, EBADF, EOPNOTSUPP, ETXTBSY, EXDEV from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +try: + import fcntl +except ImportError: + fcntl = None try: import posix except ImportError: @@ -50,6 +54,69 @@ def _get_copy_blocksize(infd): return blocksize +if fcntl and hasattr(fcntl, 'FICLONE'): + def _clonefd(source_fd, target_fd): + """ + Perform a lightweight copy of two files, where the data blocks are + copied only when modified. This is known as Copy on Write (CoW), + instantaneous copy or reflink. + """ + fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd) +else: + _clonefd = None + + +if posix and hasattr(posix, '_fcopyfile'): + def _copyfd(source_fd, target_fd): + """ + Copy a regular file content using high-performance fcopyfile(3) + syscall (macOS). + """ + posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) +elif hasattr(os, 'copy_file_range'): + def _copyfd(source_fd, target_fd): + """ + Copy data from one regular mmap-like fd to another by using a + high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side + copy. + This should work on Linux >= 4.5 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.copy_file_range(source_fd, target_fd, blocksize, + offset_dst=offset) + if sent == 0: + break # EOF + offset += sent +elif hasattr(os, 'sendfile'): + def _copyfd(source_fd, target_fd): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.sendfile(target_fd, source_fd, offset, blocksize) + if sent == 0: + break # EOF + offset += sent +else: + _copyfd = None + + +def _copyfileobj(source_f, target_f): + """ + Copy data from file-like object source_f to file-like object target_f. + """ + read_source = source_f.read + write_target = target_f.write + while buf := read_source(1024 * 1024): + write_target(buf) + + class UnsupportedOperation(NotImplementedError): """An exception that is raised when an unsupported operation is called on a path object. @@ -834,25 +901,31 @@ def copy(self, target, follow_symlinks=True): try: with target.open('wb') as target_f: try: - # Use fast FD-based implementation if file objects are - # backed by FDs, and the OS supports it. - copyfd = self._copyfd source_fd = source_f.fileno() target_fd = target_f.fileno() except Exception: - # Otherwise use file objects' read() and write(). - read_source = source_f.read - write_target = target_f.write - while buf := read_source(1024 * 1024): - write_target(buf) - else: - try: - copyfd(source_fd, target_fd) - except OSError as err: - # Produce more useful error messages. - err.filename = str(self) - err.filename2 = str(target) - raise err + return _copyfileobj(source_f, target_f) + try: + # Use OS copy-on-write where available. + if _clonefd: + try: + return _clonefd(source_fd, target_fd) + except OSError as err: + if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): + raise err + + # Use OS copy where available. + if _copyfd: + return _copyfd(source_fd, target_fd) + + # Last resort: copy between file objects. + return _copyfileobj(source_f, target_f) + except OSError as err: + # Produce more useful error messages. + err.filename = str(self) + err.filename2 = str(target) + raise err + except IsADirectoryError as e: if not target.exists(): # Raise a less confusing exception. @@ -861,49 +934,6 @@ def copy(self, target, follow_symlinks=True): else: raise - if hasattr(os, 'copy_file_range'): - @staticmethod - def _copyfd(source_fd, target_fd): - """ - Copy data from one regular mmap-like fd to another by using a - high-performance copy_file_range(2) syscall that gives filesystems - an opportunity to implement the use of reflinks or server-side - copy. - This should work on Linux >= 4.5 only. - """ - blocksize = _get_copy_blocksize(source_fd) - offset = 0 - while True: - sent = os.copy_file_range(source_fd, target_fd, blocksize, - offset_dst=offset) - if sent == 0: - break # EOF - offset += sent - - elif hasattr(os, 'sendfile'): - @staticmethod - def _copyfd(source_fd, target_fd): - """Copy data from one regular mmap-like fd to another by using - high-performance sendfile(2) syscall. - This should work on Linux >= 2.6.33 only. - """ - blocksize = _get_copy_blocksize(source_fd) - offset = 0 - while True: - sent = os.sendfile(target_fd, source_fd, offset, blocksize) - if sent == 0: - break # EOF - offset += sent - - elif hasattr(posix, '_fcopyfile'): - @staticmethod - def _copyfd(source_fd, target_fd): - """ - Copy a regular file content using high-performance fcopyfile(3) - syscall (macOS). - """ - posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) - def rename(self, target): """ Rename this path to the target path. diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index dbc892aa188b62..f1bcba1705e304 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1708,7 +1708,7 @@ def test_copy_directory(self): base = self.cls(self.base) source = base / 'dirA' target = base / 'copyA' - with self.assertRaises(IsADirectoryError): + with self.assertRaises(OSError): source.copy(target) @needs_symlinks @@ -1743,7 +1743,7 @@ def test_copy_to_existing_directory(self): base = self.cls(self.base) source = base / 'fileA' target = base / 'dirA' - with self.assertRaises(IsADirectoryError): + with self.assertRaises(OSError): source.copy(target) @needs_symlinks From 738f8ed91dbf87249dfb1cf9631b2eb1688f7364 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 4 Jun 2024 16:48:27 +0100 Subject: [PATCH 07/16] Move to `pathlib._os`. --- Lib/pathlib/_abc.py | 105 +++----------------------------------------- Lib/pathlib/_os.py | 96 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 98 deletions(-) create mode 100644 Lib/pathlib/_os.py diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 21fd6e4cc6d602..fd9eb41532a183 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,19 +12,10 @@ """ import functools -import os -import sys from glob import _Globber, _no_recurse_symlinks from errno import ENOTDIR, ELOOP, EBADF, EOPNOTSUPP, ETXTBSY, EXDEV from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -try: - import fcntl -except ImportError: - fcntl = None -try: - import posix -except ImportError: - posix = None +from ._os import clonefd, copyfd, copyfileobj __all__ = ["UnsupportedOperation"] @@ -35,88 +26,6 @@ def _is_case_sensitive(parser): return parser.normcase('Aa') == 'Aa' -def _get_copy_blocksize(infd): - """Determine blocksize for fastcopying on Linux. - Hopefully the whole file will be copied in a single call. - The copying itself should be performed in a loop 'till EOF is - reached (0 return) so a blocksize smaller or bigger than the actual - file size should not make any difference, also in case the file - content changes while being copied. - """ - try: - blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB - except OSError: - blocksize = 2 ** 27 # 128 MiB - # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, - # see gh-82500. - if sys.maxsize < 2 ** 32: - blocksize = min(blocksize, 2 ** 30) - return blocksize - - -if fcntl and hasattr(fcntl, 'FICLONE'): - def _clonefd(source_fd, target_fd): - """ - Perform a lightweight copy of two files, where the data blocks are - copied only when modified. This is known as Copy on Write (CoW), - instantaneous copy or reflink. - """ - fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd) -else: - _clonefd = None - - -if posix and hasattr(posix, '_fcopyfile'): - def _copyfd(source_fd, target_fd): - """ - Copy a regular file content using high-performance fcopyfile(3) - syscall (macOS). - """ - posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) -elif hasattr(os, 'copy_file_range'): - def _copyfd(source_fd, target_fd): - """ - Copy data from one regular mmap-like fd to another by using a - high-performance copy_file_range(2) syscall that gives filesystems - an opportunity to implement the use of reflinks or server-side - copy. - This should work on Linux >= 4.5 only. - """ - blocksize = _get_copy_blocksize(source_fd) - offset = 0 - while True: - sent = os.copy_file_range(source_fd, target_fd, blocksize, - offset_dst=offset) - if sent == 0: - break # EOF - offset += sent -elif hasattr(os, 'sendfile'): - def _copyfd(source_fd, target_fd): - """Copy data from one regular mmap-like fd to another by using - high-performance sendfile(2) syscall. - This should work on Linux >= 2.6.33 only. - """ - blocksize = _get_copy_blocksize(source_fd) - offset = 0 - while True: - sent = os.sendfile(target_fd, source_fd, offset, blocksize) - if sent == 0: - break # EOF - offset += sent -else: - _copyfd = None - - -def _copyfileobj(source_f, target_f): - """ - Copy data from file-like object source_f to file-like object target_f. - """ - read_source = source_f.read - write_target = target_f.write - while buf := read_source(1024 * 1024): - write_target(buf) - - class UnsupportedOperation(NotImplementedError): """An exception that is raised when an unsupported operation is called on a path object. @@ -904,22 +813,22 @@ def copy(self, target, follow_symlinks=True): source_fd = source_f.fileno() target_fd = target_f.fileno() except Exception: - return _copyfileobj(source_f, target_f) + return copyfileobj(source_f, target_f) try: # Use OS copy-on-write where available. - if _clonefd: + if clonefd: try: - return _clonefd(source_fd, target_fd) + return clonefd(source_fd, target_fd) except OSError as err: if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): raise err # Use OS copy where available. - if _copyfd: - return _copyfd(source_fd, target_fd) + if copyfd: + return copyfd(source_fd, target_fd) # Last resort: copy between file objects. - return _copyfileobj(source_f, target_f) + return copyfileobj(source_f, target_f) except OSError as err: # Produce more useful error messages. err.filename = str(self) diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py new file mode 100644 index 00000000000000..25c93463d848ec --- /dev/null +++ b/Lib/pathlib/_os.py @@ -0,0 +1,96 @@ +""" +Low-level OS functionality wrappers used by pathlib. +""" + +import os +import sys +try: + import fcntl +except ImportError: + fcntl = None +try: + import posix +except ImportError: + posix = None + + +def get_copy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB + except OSError: + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + + +if fcntl and hasattr(fcntl, 'FICLONE'): + def clonefd(source_fd, target_fd): + """ + Perform a lightweight copy of two files, where the data blocks are + copied only when modified. This is known as Copy on Write (CoW), + instantaneous copy or reflink. + """ + fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd) +else: + clonefd = None + + +if posix and hasattr(posix, '_fcopyfile'): + def copyfd(source_fd, target_fd): + """ + Copy a regular file content using high-performance fcopyfile(3) + syscall (macOS). + """ + posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) +elif hasattr(os, 'copy_file_range'): + def copyfd(source_fd, target_fd): + """ + Copy data from one regular mmap-like fd to another by using a + high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side + copy. + This should work on Linux >= 4.5 only. + """ + blocksize = get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.copy_file_range(source_fd, target_fd, blocksize, + offset_dst=offset) + if sent == 0: + break # EOF + offset += sent +elif hasattr(os, 'sendfile'): + def copyfd(source_fd, target_fd): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 only. + """ + blocksize = get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.sendfile(target_fd, source_fd, offset, blocksize) + if sent == 0: + break # EOF + offset += sent +else: + copyfd = None + + +def copyfileobj(source_f, target_f): + """ + Copy data from file-like object source_f to file-like object target_f. + """ + read_source = source_f.read + write_target = target_f.write + while buf := read_source(1024 * 1024): + write_target(buf) From 83cb4dcaa556c2e4b8e79ff6a6ee205ade531adb Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 4 Jun 2024 16:56:44 +0100 Subject: [PATCH 08/16] Move a bit more code into _os. --- Lib/pathlib/_abc.py | 31 +++---------------------------- Lib/pathlib/_os.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index fd9eb41532a183..973fcd47878fb7 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -13,9 +13,9 @@ import functools from glob import _Globber, _no_recurse_symlinks -from errno import ENOTDIR, ELOOP, EBADF, EOPNOTSUPP, ETXTBSY, EXDEV +from errno import ENOTDIR, ELOOP from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -from ._os import clonefd, copyfd, copyfileobj +from ._os import copyfileobj __all__ = ["UnsupportedOperation"] @@ -809,32 +809,7 @@ def copy(self, target, follow_symlinks=True): with self.open('rb') as source_f: try: with target.open('wb') as target_f: - try: - source_fd = source_f.fileno() - target_fd = target_f.fileno() - except Exception: - return copyfileobj(source_f, target_f) - try: - # Use OS copy-on-write where available. - if clonefd: - try: - return clonefd(source_fd, target_fd) - except OSError as err: - if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): - raise err - - # Use OS copy where available. - if copyfd: - return copyfd(source_fd, target_fd) - - # Last resort: copy between file objects. - return copyfileobj(source_f, target_f) - except OSError as err: - # Produce more useful error messages. - err.filename = str(self) - err.filename2 = str(target) - raise err - + copyfileobj(source_f, target_f) except IsADirectoryError as e: if not target.exists(): # Raise a less confusing exception. diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index 25c93463d848ec..ebfd499bd5443f 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -2,6 +2,7 @@ Low-level OS functionality wrappers used by pathlib. """ +from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV import os import sys try: @@ -90,6 +91,33 @@ def copyfileobj(source_f, target_f): """ Copy data from file-like object source_f to file-like object target_f. """ + try: + source_fd = source_f.fileno() + target_fd = target_f.fileno() + except Exception: + pass # Fall through to generic code. + else: + try: + # Use OS copy-on-write where available. + if clonefd: + try: + clonefd(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): + raise err + + # Use OS copy where available. + if copyfd: + copyfd(source_fd, target_fd) + return + except OSError as err: + # Produce more useful error messages. + err.filename = source_f.name + err.filename2 = target_f.name + raise err + + # Last resort: copy with fileobj read() and write(). read_source = source_f.read write_target = target_f.write while buf := read_source(1024 * 1024): From 0aceea237fe3ad79bd00fa53da1939b0bf320b9f Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 4 Jun 2024 17:44:25 +0100 Subject: [PATCH 09/16] Handle classes that subclass PathBase and os.PathLike but not Path. --- Lib/pathlib/_local.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index dcad491f60960b..37935e724de1b7 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -791,11 +791,17 @@ def copy(self, target, follow_symlinks=True): symlink and follow_symlinks is false, a symlink will be created at the target. """ - if isinstance(target, PathBase) and not isinstance(target, Path): - return PathBase.copy(self, target, follow_symlinks=follow_symlinks) + try: + target = os.fspath(target) + except TypeError: + if isinstance(target, PathBase): + # Target is an instance of PathBase but not os.PathLike. + # Use generic implementation from PathBase. + return PathBase.copy(self, target, follow_symlinks=follow_symlinks) + raise flags = 0 if follow_symlinks else _winapi.COPY_FILE_COPY_SYMLINK - _winapi.CopyFile2(os.fspath(self), os.fspath(target), flags) + _winapi.CopyFile2(os.fspath(self), target, flags) def chmod(self, mode, *, follow_symlinks=True): """ From eec9e7f1d31048b450b44e94287dc6550d61c077 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 4 Jun 2024 18:18:38 +0100 Subject: [PATCH 10/16] Reword --- Doc/library/pathlib.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 0252c923fe8458..c44eab7b922e50 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1486,8 +1486,11 @@ example because the path doesn't exist). will be created as a symbolic link. If *follow_symlinks* is true and this file is a symbolic link, *target* will be a copy of the symlink target. - This method doesn't guarantee whether permissions, ownership, and other - metadata is copied; it may vary by operating system and Python version. + .. note:: + This method uses operating system functionality to copy file content + efficiently. The OS might also copy some metadata, such as file + permissions. After the copy is complete, users may wish to call + :meth:`Path.chmod` to set the permissions of the target file. .. versionadded:: 3.14 From fddcd73a81446bbd33326dc3da8c0d88423bf272 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 5 Jun 2024 18:38:29 +0100 Subject: [PATCH 11/16] Add test for directory symlinks --- Lib/test/test_pathlib/test_pathlib_abc.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index f1bcba1705e304..0d83f485df26b4 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1731,6 +1731,16 @@ def test_copy_symlink_follow_symlinks_false(self): self.assertTrue(target.is_symlink()) self.assertEqual(source.readlink(), target.readlink()) + @needs_symlinks + def test_copy_directory_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'copyA' + source.copy(target, follow_symlinks=False) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + def test_copy_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' From c683c2dfa72330801fa35de355020f1fa7807126 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 5 Jun 2024 19:43:24 +0100 Subject: [PATCH 12/16] Fix handling of symlinks to directories on Windows. --- Lib/pathlib/_local.py | 11 +++-------- Lib/pathlib/_os.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 37935e724de1b7..0105ea3042422e 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -16,12 +16,9 @@ import grp except ImportError: grp = None -try: - import _winapi -except ImportError: - _winapi = None from ._abc import UnsupportedOperation, PurePathBase, PathBase +from ._os import copyfile __all__ = [ @@ -784,7 +781,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): if not exist_ok or not self.is_dir(): raise - if hasattr(_winapi, 'CopyFile2'): + if copyfile: def copy(self, target, follow_symlinks=True): """ Copy the contents of this file to the given target. If this file is a @@ -799,9 +796,7 @@ def copy(self, target, follow_symlinks=True): # Use generic implementation from PathBase. return PathBase.copy(self, target, follow_symlinks=follow_symlinks) raise - - flags = 0 if follow_symlinks else _winapi.COPY_FILE_COPY_SYMLINK - _winapi.CopyFile2(os.fspath(self), target, flags) + copyfile(os.fspath(self), target, follow_symlinks) def chmod(self, mode, *, follow_symlinks=True): """ diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index ebfd499bd5443f..d45db5b7766ab4 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -4,6 +4,7 @@ from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV import os +import stat import sys try: import fcntl @@ -13,6 +14,10 @@ import posix except ImportError: posix = None +try: + import _winapi +except ImportError: + _winapi = None def get_copy_blocksize(infd): @@ -87,6 +92,35 @@ def copyfd(source_fd, target_fd): copyfd = None +if _winapi and hasattr(_winapi, 'CopyFile2'): + def is_dirlink(path): + try: + st = os.lstat(path) + except OSError: + return False + return (st.st_file_attributes & stat.FILE_ATTRIBUTE_DIRECTORY and + st.st_reparse_tag == stat.IO_REPARSE_TAG_SYMLINK) + + def copyfile(source, target, follow_symlinks): + """ + Copy from one file to another using CopyFile2 (Windows only). + """ + if follow_symlinks: + flags = 0 + else: + flags = _winapi.COPY_FILE_COPY_SYMLINK + try: + _winapi.CopyFile2(source, target, flags) + except OSError as err: + # Check for ERROR_ACCESS_DENIED + if err.winerror != 5 or not is_dirlink(source): + raise err + flags |= _winapi.COPY_FILE_DIRECTORY + _winapi.CopyFile2(source, target, flags) +else: + copyfile = None + + def copyfileobj(source_f, target_f): """ Copy data from file-like object source_f to file-like object target_f. From 0be714f0931cc418508366984aa20bda87c22385 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 5 Jun 2024 20:09:47 +0100 Subject: [PATCH 13/16] Expose _winapi.COPY_FILE_DIRECTORY --- Modules/_winapi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 8794d568e92a36..c90d6c5a9ef3ef 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -3166,6 +3166,11 @@ static int winapi_exec(PyObject *m) #define COPY_FILE_REQUEST_COMPRESSED_TRAFFIC 0x10000000 #endif WINAPI_CONSTANT(F_DWORD, COPY_FILE_REQUEST_COMPRESSED_TRAFFIC); +#ifndef COPY_FILE_DIRECTORY + // Only defined in newer WinSDKs + #define COPY_FILE_DIRECTORY 0x00000080 +#endif + WINAPI_CONSTANT(F_DWORD, COPY_FILE_DIRECTORY); WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_STARTED); WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_FINISHED); From 5169b9a0161536d6b921bd5073e0bfabba1eb66b Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 5 Jun 2024 21:03:52 +0100 Subject: [PATCH 14/16] Test copying empty file. --- Lib/test/test_pathlib/test_pathlib_abc.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 0d83f485df26b4..fee2b083712bfb 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1769,6 +1769,15 @@ def test_copy_to_existing_symlink(self): self.assertFalse(real_target.is_symlink()) self.assertEqual(source.read_text(), real_target.read_text()) + def test_copy_empty(self): + base = self.cls(self.base) + source = base / 'empty' + target = base / 'copyA' + source.write_bytes(b'') + source.copy(target) + self.assertTrue(target.exists()) + self.assertEqual(target.read_bytes(), b'') + def test_iterdir(self): P = self.cls p = P(self.base) From 7c9c893f93036e1b332934b83e11b5e036091a32 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 6 Jun 2024 20:21:18 +0100 Subject: [PATCH 15/16] Apply suggestions from code review Co-authored-by: Eryk Sun --- Lib/pathlib/_os.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index d45db5b7766ab4..ca4af8df15e06d 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -96,7 +96,7 @@ def copyfd(source_fd, target_fd): def is_dirlink(path): try: st = os.lstat(path) - except OSError: + except (OSError, ValueError): return False return (st.st_file_attributes & stat.FILE_ATTRIBUTE_DIRECTORY and st.st_reparse_tag == stat.IO_REPARSE_TAG_SYMLINK) @@ -111,10 +111,11 @@ def copyfile(source, target, follow_symlinks): flags = _winapi.COPY_FILE_COPY_SYMLINK try: _winapi.CopyFile2(source, target, flags) + return except OSError as err: # Check for ERROR_ACCESS_DENIED if err.winerror != 5 or not is_dirlink(source): - raise err + raise flags |= _winapi.COPY_FILE_DIRECTORY _winapi.CopyFile2(source, target, flags) else: From 0e1e4f12da3902166c5d59af9ff8f30b57c83439 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 6 Jun 2024 20:39:31 +0100 Subject: [PATCH 16/16] Drop `follow_symlinks` argument for now. --- Doc/library/pathlib.rst | 6 +----- Lib/pathlib/_abc.py | 9 ++------ Lib/pathlib/_local.py | 10 ++++----- Lib/pathlib/_os.py | 25 ++--------------------- Lib/test/test_pathlib/test_pathlib_abc.py | 22 +------------------- Modules/_winapi.c | 5 ----- 6 files changed, 10 insertions(+), 67 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index c44eab7b922e50..8ac5aeb3677ec7 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1477,15 +1477,11 @@ example because the path doesn't exist). available. In previous versions, :exc:`NotImplementedError` was raised. -.. method:: Path.copy(target, *, follow_symlinks=True) +.. method:: Path.copy(target) Copy the contents of this file to the *target* file. If *target* specifies a file that already exists, it will be replaced. - If *follow_symlinks* is false, and this file is a symbolic link, *target* - will be created as a symbolic link. If *follow_symlinks* is true and this - file is a symbolic link, *target* will be a copy of the symlink target. - .. note:: This method uses operating system functionality to copy file content efficiently. The OS might also copy some metadata, such as file diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 973fcd47878fb7..11acee07adad1b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -793,19 +793,14 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ raise UnsupportedOperation(self._unsupported_msg('mkdir()')) - def copy(self, target, follow_symlinks=True): + def copy(self, target): """ - Copy the contents of this file to the given target. If this file is a - symlink and follow_symlinks is false, a symlink will be created at the - target. + Copy the contents of this file to the given target. """ if not isinstance(target, PathBase): target = self.with_segments(target) if self._samefile_safe(target): raise OSError(f"{self!r} and {target!r} are the same file") - if not follow_symlinks and self.is_symlink(): - target.symlink_to(self.readlink()) - return with self.open('rb') as source_f: try: with target.open('wb') as target_f: diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 0105ea3042422e..cffed10dbd1207 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -782,11 +782,9 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): raise if copyfile: - def copy(self, target, follow_symlinks=True): + def copy(self, target): """ - Copy the contents of this file to the given target. If this file is a - symlink and follow_symlinks is false, a symlink will be created at the - target. + Copy the contents of this file to the given target. """ try: target = os.fspath(target) @@ -794,9 +792,9 @@ def copy(self, target, follow_symlinks=True): if isinstance(target, PathBase): # Target is an instance of PathBase but not os.PathLike. # Use generic implementation from PathBase. - return PathBase.copy(self, target, follow_symlinks=follow_symlinks) + return PathBase.copy(self, target) raise - copyfile(os.fspath(self), target, follow_symlinks) + copyfile(os.fspath(self), target) def chmod(self, mode, *, follow_symlinks=True): """ diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index ca4af8df15e06d..1771d54e4167c1 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -4,7 +4,6 @@ from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV import os -import stat import sys try: import fcntl @@ -93,31 +92,11 @@ def copyfd(source_fd, target_fd): if _winapi and hasattr(_winapi, 'CopyFile2'): - def is_dirlink(path): - try: - st = os.lstat(path) - except (OSError, ValueError): - return False - return (st.st_file_attributes & stat.FILE_ATTRIBUTE_DIRECTORY and - st.st_reparse_tag == stat.IO_REPARSE_TAG_SYMLINK) - - def copyfile(source, target, follow_symlinks): + def copyfile(source, target): """ Copy from one file to another using CopyFile2 (Windows only). """ - if follow_symlinks: - flags = 0 - else: - flags = _winapi.COPY_FILE_COPY_SYMLINK - try: - _winapi.CopyFile2(source, target, flags) - return - except OSError as err: - # Check for ERROR_ACCESS_DENIED - if err.winerror != 5 or not is_dirlink(source): - raise - flags |= _winapi.COPY_FILE_DIRECTORY - _winapi.CopyFile2(source, target, flags) + _winapi.CopyFile2(source, target, 0) else: copyfile = None diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index fee2b083712bfb..fd71284159d5c0 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1712,7 +1712,7 @@ def test_copy_directory(self): source.copy(target) @needs_symlinks - def test_copy_symlink_follow_symlinks_true(self): + def test_copy_symlink(self): base = self.cls(self.base) source = base / 'linkA' target = base / 'copyA' @@ -1721,26 +1721,6 @@ def test_copy_symlink_follow_symlinks_true(self): self.assertFalse(target.is_symlink()) self.assertEqual(source.read_text(), target.read_text()) - @needs_symlinks - def test_copy_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - source.copy(target, follow_symlinks=False) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_directory_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'copyA' - source.copy(target, follow_symlinks=False) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - def test_copy_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' diff --git a/Modules/_winapi.c b/Modules/_winapi.c index c90d6c5a9ef3ef..8794d568e92a36 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -3166,11 +3166,6 @@ static int winapi_exec(PyObject *m) #define COPY_FILE_REQUEST_COMPRESSED_TRAFFIC 0x10000000 #endif WINAPI_CONSTANT(F_DWORD, COPY_FILE_REQUEST_COMPRESSED_TRAFFIC); -#ifndef COPY_FILE_DIRECTORY - // Only defined in newer WinSDKs - #define COPY_FILE_DIRECTORY 0x00000080 -#endif - WINAPI_CONSTANT(F_DWORD, COPY_FILE_DIRECTORY); WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_STARTED); WINAPI_CONSTANT(F_DWORD, COPYFILE2_CALLBACK_CHUNK_FINISHED);