diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 5594806f03..8013f9bb98 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -212,6 +212,15 @@ def exists(self, path_info): fname = next(self._list_paths(path_info, max_items=1), "") return path_info.path == fname or fname.startswith(dir_path.path) + def makedirs(self, path_info): + # We need to support creating empty directories, which means + # creating an object with an empty body and a trailing slash `/`. + # + # We are not creating directory objects for every parent prefix, + # as it is not required. + dir_path = path_info / "" + self.s3.put_object(Bucket=path_info.bucket, Key=dir_path.path, Body="") + def isdir(self, path_info): # S3 doesn't have a concept for directories. # @@ -271,4 +280,7 @@ def _generate_download_url(self, path_info, expires=3600): def walk_files(self, path_info, max_items=None): for fname in self._list_paths(path_info, max_items): + if fname.endswith("/"): + continue + yield path_info.replace(path=fname) diff --git a/tests/unit/remote/test_s3.py b/tests/unit/remote/test_s3.py index a82d005c4d..49fc3dbb89 100644 --- a/tests/unit/remote/test_s3.py +++ b/tests/unit/remote/test_s3.py @@ -79,9 +79,11 @@ def test_walk_files(remote): remote.path_info / "data/subdir/1", remote.path_info / "data/subdir/2", remote.path_info / "data/subdir/3", + remote.path_info / "empty_file", + remote.path_info / "foo", ] - assert list(remote.walk_files(remote.path_info / "data")) == files + assert list(remote.walk_files(remote.path_info)) == files def test_copy_preserve_etag_across_buckets(remote): @@ -99,3 +101,11 @@ def test_copy_preserve_etag_across_buckets(remote): to_etag = RemoteS3.get_etag(s3, "another", "foo") assert from_etag == to_etag + + +def test_makedirs(remote): + empty_dir = remote.path_info / "empty_dir" / "" + remote.remove(empty_dir) + assert not remote.exists(empty_dir) + remote.makedirs(empty_dir) + assert remote.exists(empty_dir)