Skip to content

Commit

Permalink
Merge pull request #3093 from iterative/move-fs-utilities
Browse files Browse the repository at this point in the history
Move fs utilities to fs.py from __init__.py
  • Loading branch information
efiop authored Jan 9, 2020
2 parents 238b4e8 + 74850fe commit 095464d
Show file tree
Hide file tree
Showing 17 changed files with 154 additions and 151 deletions.
3 changes: 2 additions & 1 deletion dvc/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from dvc.lock import Lock, LockError
from dvc.repo import Repo
from dvc.scm import SCM
from dvc.utils import env2bool, is_binary, makedirs
from dvc.utils import env2bool, is_binary
from dvc.utils.fs import makedirs


logger = logging.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from dvc.progress import Tqdm
from dvc.remote.slow_link_detection import slow_link_guard
from dvc.state import StateNoop
from dvc.utils import makedirs, relpath, tmp_fname
from dvc.utils.fs import move
from dvc.utils import relpath, tmp_fname
from dvc.utils.fs import move, makedirs
from dvc.utils.http import open_url

logger = logging.getLogger(__name__)
Expand Down
8 changes: 3 additions & 5 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@
from dvc.scheme import Schemes
from dvc.scm.tree import is_working_tree
from dvc.system import System
from dvc.utils import copyfile
from dvc.utils.fs import copyfile
from dvc.utils import file_md5
from dvc.utils import makedirs
from dvc.utils import relpath
from dvc.utils import tmp_fname
from dvc.utils import walk_files
from dvc.compat import fspath_py35
from dvc.utils.fs import move
from dvc.utils.fs import remove
from dvc.utils.fs import move, makedirs, remove
from dvc.utils.fs import walk_files

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, root_dir=None):
from dvc.repo.metrics import Metrics
from dvc.scm.tree import WorkingTree
from dvc.repo.tag import Tag
from dvc.utils import makedirs
from dvc.utils.fs import makedirs

root_dir = self.find_root(root_dir)

Expand Down
53 changes: 0 additions & 53 deletions dvc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,53 +105,6 @@ def dict_md5(d, exclude=()):
return bytes_md5(byts)


def copyfile(src, dest, no_progress_bar=False, name=None):
"""Copy file with progress bar"""
from dvc.exceptions import DvcException
from dvc.progress import Tqdm
from dvc.system import System

src = fspath_py35(src)
dest = fspath_py35(dest)

name = name if name else os.path.basename(dest)
total = os.stat(src).st_size

if os.path.isdir(dest):
dest = os.path.join(dest, os.path.basename(src))

try:
System.reflink(src, dest)
except DvcException:
with Tqdm(
desc=name, disable=no_progress_bar, total=total, bytes=True
) as pbar:
with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
while True:
buf = fsrc.read(LOCAL_CHUNK_SIZE)
if not buf:
break
fdest.write(buf)
pbar.update(len(buf))


def makedirs(path, exist_ok=False, mode=None):
path = fspath_py35(path)

if mode is None:
os.makedirs(path, exist_ok=exist_ok)
return

# utilize umask to set proper permissions since Python 3.7 the `mode`
# `makedirs` argument no longer affects the file permission bits of
# newly-created intermediate-level directories.
umask = os.umask(0o777 - mode)
try:
os.makedirs(path, exist_ok=exist_ok)
finally:
os.umask(umask)


def _split(list_to_split, chunk_size):
return [
list_to_split[i : i + chunk_size]
Expand Down Expand Up @@ -278,12 +231,6 @@ def to_yaml_string(data):
return stream.getvalue()


def walk_files(directory):
for root, _, files in os.walk(fspath(directory)):
for f in files:
yield os.path.join(root, f)


def colorize(message, color=None):
"""Returns a message in a specified color."""
if not color:
Expand Down
55 changes: 55 additions & 0 deletions dvc/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

logger = logging.getLogger(__name__)

LOCAL_CHUNK_SIZE = 2 ** 20 # 1 MB


def fs_copy(src, dst):
if os.path.isdir(src):
Expand Down Expand Up @@ -152,3 +154,56 @@ def normalize_path(path):
parent = os.path.join(normalize_path(parent), "")
child = normalize_path(child)
return child != parent and child.startswith(parent)


def makedirs(path, exist_ok=False, mode=None):
path = fspath_py35(path)

if mode is None:
os.makedirs(path, exist_ok=exist_ok)
return

# utilize umask to set proper permissions since Python 3.7 the `mode`
# `makedirs` argument no longer affects the file permission bits of
# newly-created intermediate-level directories.
umask = os.umask(0o777 - mode)
try:
os.makedirs(path, exist_ok=exist_ok)
finally:
os.umask(umask)


def copyfile(src, dest, no_progress_bar=False, name=None):
"""Copy file with progress bar"""
from dvc.exceptions import DvcException
from dvc.progress import Tqdm
from dvc.system import System

src = fspath_py35(src)
dest = fspath_py35(dest)

name = name if name else os.path.basename(dest)
total = os.stat(src).st_size

if os.path.isdir(dest):
dest = os.path.join(dest, os.path.basename(src))

try:
System.reflink(src, dest)
except DvcException:
with Tqdm(
desc=name, disable=no_progress_bar, total=total, bytes=True
) as pbar:
with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
while True:
buf = fsrc.read(LOCAL_CHUNK_SIZE)
if not buf:
break
fdest.write(buf)
pbar.update(len(buf))


def walk_files(directory):
for root, _, files in os.walk(fspath(directory)):
for f in files:
yield os.path.join(root, f)
2 changes: 1 addition & 1 deletion tests/dir_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import pytest
from funcy.py3 import lmap, retry

from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from dvc.compat import fspath, fspath_py35


Expand Down
3 changes: 2 additions & 1 deletion tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from dvc.stage import StageFileBadNameError
from dvc.stage import StageFileDoesNotExistError
from dvc.system import System
from dvc.utils import relpath, walk_files
from dvc.utils import relpath
from dvc.utils.fs import walk_files
from dvc.utils.stage import dump_stage_file
from dvc.utils.stage import load_stage_file
from tests.basic_env import TestDvc
Expand Down
36 changes: 36 additions & 0 deletions tests/func/test_fs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import stat

import pytest

from dvc.utils.fs import makedirs, copyfile


@pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.")
def test_makedirs_permissions(tmp_dir):
dir_mode = 0o755
intermediate_dir = "тестовая-директория"
test_dir = os.path.join(intermediate_dir, "data")

assert not os.path.exists(intermediate_dir)

makedirs(test_dir, mode=dir_mode)

assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode
assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode


def test_copyfile(tmp_dir):
src = "file1"
dest = "file2"
dest_dir = "testdir"

tmp_dir.gen(src, "file1contents")

os.mkdir(dest_dir)

copyfile(src, dest)
assert (tmp_dir / dest).read_text() == "file1contents"

copyfile(src, dest_dir)
assert (tmp_dir / dest_dir / src).read_text() == "file1contents"
2 changes: 1 addition & 1 deletion tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from dvc.repo.get import GetDVCFileError, PathMissingError
from dvc.repo import Repo
from dvc.system import System
from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from dvc.compat import fspath
from tests.utils import trees_equal

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_get_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

from dvc.repo import Repo
from dvc.utils import makedirs
from dvc.utils.fs import makedirs


def test_get_file(repo_dir):
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dvc.exceptions import NoOutputInExternalRepoError
from dvc.stage import Stage
from dvc.system import System
from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from dvc.compat import fspath
from tests.utils import trees_equal

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_import_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import dvc
from dvc.main import main
from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from tests.basic_env import TestDvc
from tests.utils import spy

Expand Down
34 changes: 0 additions & 34 deletions tests/func/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,8 @@
# encoding: utf-8
import os
import stat

import pytest

from dvc import utils


def test_copyfile(tmp_dir):
src = "file1"
dest = "file2"
dest_dir = "testdir"

tmp_dir.gen(src, "file1contents")

os.mkdir(dest_dir)

utils.copyfile(src, dest)
assert (tmp_dir / dest).read_text() == "file1contents"

utils.copyfile(src, dest_dir)
assert (tmp_dir / dest_dir / src).read_text() == "file1contents"


def test_file_md5_crlf(tmp_dir):
tmp_dir.gen("cr", b"a\nb\nc")
tmp_dir.gen("crlf", b"a\r\nb\r\nc")
Expand Down Expand Up @@ -62,17 +42,3 @@ def test_boxify():
)

assert expected == utils.boxify("message")


@pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.")
def test_makedirs_permissions(tmp_dir):
dir_mode = 0o755
intermediate_dir = "тестовая-директория"
test_dir = os.path.join(intermediate_dir, "data")

assert not os.path.exists(intermediate_dir)

utils.makedirs(test_dir, mode=dir_mode)

assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode
assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode
2 changes: 1 addition & 1 deletion tests/unit/remote/test_remote_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest
import os
from dvc.remote.s3 import RemoteS3
from dvc.utils import walk_files
from dvc.utils.fs import walk_files
from dvc.path_info import PathInfo
from tests.remotes import GCP, S3Mocked

Expand Down
47 changes: 47 additions & 0 deletions tests/unit/utils/test_fs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filecmp
import os
from unittest import TestCase

Expand All @@ -11,11 +12,14 @@
from dvc.system import System
from dvc.utils import relpath
from dvc.utils.fs import BasePathNotInCheckedPathException
from dvc.utils.fs import copyfile
from dvc.utils.fs import contains_symlink_up_to
from dvc.utils.fs import get_inode
from dvc.utils.fs import get_mtime_and_size
from dvc.utils.fs import move
from dvc.utils.fs import path_isin, remove
from dvc.utils.fs import makedirs
from dvc.utils.fs import walk_files
from tests.basic_env import TestDir
from tests.utils import spy

Expand Down Expand Up @@ -202,3 +206,46 @@ def test_path_isin_with_absolute_path():
child = os.path.join(parent, "to", "folder")

assert path_isin(child, parent)


def test_makedirs(repo_dir):
path = os.path.join(repo_dir.root_dir, "directory")
path_info = PathInfo(
os.path.join(repo_dir.root_dir, "another", "directory")
)

makedirs(path)
assert os.path.isdir(path)

makedirs(path_info)
assert os.path.isdir(path_info.fspath)


@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR])
def test_copyfile(path, repo_dir):
src = repo_dir.FOO
dest = path
src_info = PathInfo(repo_dir.BAR)
dest_info = PathInfo(path)

copyfile(src, dest)
if os.path.isdir(dest):
assert filecmp.cmp(
src, os.path.join(dest, os.path.basename(src)), shallow=False
)
else:
assert filecmp.cmp(src, dest, shallow=False)

copyfile(src_info, dest_info)
if os.path.isdir(dest_info.fspath):
assert filecmp.cmp(
src_info.fspath,
os.path.join(dest_info.fspath, os.path.basename(src_info.fspath)),
shallow=False,
)
else:
assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False)


def test_walk_files(tmp_dir):
assert list(walk_files(".")) == list(walk_files(tmp_dir))
Loading

0 comments on commit 095464d

Please sign in to comment.