Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move fs utilities to fs.py from __init__.py #3093

Merged
merged 3 commits into from
Jan 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dvc/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from dvc.lock import Lock, LockError
from dvc.repo import Repo
from dvc.scm import SCM
from dvc.utils import env2bool, is_binary, makedirs
from dvc.utils import env2bool, is_binary
from dvc.utils.fs import makedirs


logger = logging.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from dvc.progress import Tqdm
from dvc.remote.slow_link_detection import slow_link_guard
from dvc.state import StateNoop
from dvc.utils import makedirs, relpath, tmp_fname
from dvc.utils.fs import move
from dvc.utils import relpath, tmp_fname
from dvc.utils.fs import move, makedirs
from dvc.utils.http import open_url

logger = logging.getLogger(__name__)
Expand Down
8 changes: 3 additions & 5 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@
from dvc.scheme import Schemes
from dvc.scm.tree import is_working_tree
from dvc.system import System
from dvc.utils import copyfile
from dvc.utils.fs import copyfile
from dvc.utils import file_md5
from dvc.utils import makedirs
from dvc.utils import relpath
from dvc.utils import tmp_fname
from dvc.utils import walk_files
from dvc.compat import fspath_py35
from dvc.utils.fs import move
from dvc.utils.fs import remove
from dvc.utils.fs import move, makedirs, remove
from dvc.utils.fs import walk_files

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, root_dir=None):
from dvc.repo.metrics import Metrics
from dvc.scm.tree import WorkingTree
from dvc.repo.tag import Tag
from dvc.utils import makedirs
from dvc.utils.fs import makedirs

root_dir = self.find_root(root_dir)

Expand Down
53 changes: 0 additions & 53 deletions dvc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,53 +105,6 @@ def dict_md5(d, exclude=()):
return bytes_md5(byts)


def copyfile(src, dest, no_progress_bar=False, name=None):
"""Copy file with progress bar"""
from dvc.exceptions import DvcException
from dvc.progress import Tqdm
from dvc.system import System

src = fspath_py35(src)
dest = fspath_py35(dest)

name = name if name else os.path.basename(dest)
total = os.stat(src).st_size

if os.path.isdir(dest):
dest = os.path.join(dest, os.path.basename(src))

try:
System.reflink(src, dest)
except DvcException:
with Tqdm(
desc=name, disable=no_progress_bar, total=total, bytes=True
) as pbar:
with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
while True:
buf = fsrc.read(LOCAL_CHUNK_SIZE)
if not buf:
break
fdest.write(buf)
pbar.update(len(buf))


def makedirs(path, exist_ok=False, mode=None):
path = fspath_py35(path)

if mode is None:
os.makedirs(path, exist_ok=exist_ok)
return

# utilize umask to set proper permissions since Python 3.7 the `mode`
# `makedirs` argument no longer affects the file permission bits of
# newly-created intermediate-level directories.
umask = os.umask(0o777 - mode)
try:
os.makedirs(path, exist_ok=exist_ok)
finally:
os.umask(umask)


def _split(list_to_split, chunk_size):
return [
list_to_split[i : i + chunk_size]
Expand Down Expand Up @@ -278,12 +231,6 @@ def to_yaml_string(data):
return stream.getvalue()


def walk_files(directory):
for root, _, files in os.walk(fspath(directory)):
for f in files:
yield os.path.join(root, f)


def colorize(message, color=None):
"""Returns a message in a specified color."""
if not color:
Expand Down
55 changes: 55 additions & 0 deletions dvc/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

logger = logging.getLogger(__name__)

LOCAL_CHUNK_SIZE = 2 ** 20 # 1 MB


def fs_copy(src, dst):
if os.path.isdir(src):
Expand Down Expand Up @@ -152,3 +154,56 @@ def normalize_path(path):
parent = os.path.join(normalize_path(parent), "")
child = normalize_path(child)
return child != parent and child.startswith(parent)


def makedirs(path, exist_ok=False, mode=None):
path = fspath_py35(path)

if mode is None:
os.makedirs(path, exist_ok=exist_ok)
return

# utilize umask to set proper permissions since Python 3.7 the `mode`
# `makedirs` argument no longer affects the file permission bits of
# newly-created intermediate-level directories.
umask = os.umask(0o777 - mode)
try:
os.makedirs(path, exist_ok=exist_ok)
finally:
os.umask(umask)


def copyfile(src, dest, no_progress_bar=False, name=None):
"""Copy file with progress bar"""
from dvc.exceptions import DvcException
from dvc.progress import Tqdm
from dvc.system import System

src = fspath_py35(src)
dest = fspath_py35(dest)

name = name if name else os.path.basename(dest)
total = os.stat(src).st_size

if os.path.isdir(dest):
dest = os.path.join(dest, os.path.basename(src))

try:
System.reflink(src, dest)
except DvcException:
with Tqdm(
desc=name, disable=no_progress_bar, total=total, bytes=True
) as pbar:
with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
while True:
buf = fsrc.read(LOCAL_CHUNK_SIZE)
if not buf:
break
fdest.write(buf)
pbar.update(len(buf))


def walk_files(directory):
for root, _, files in os.walk(fspath(directory)):
for f in files:
yield os.path.join(root, f)
2 changes: 1 addition & 1 deletion tests/dir_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import pytest
from funcy.py3 import lmap, retry

from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from dvc.compat import fspath, fspath_py35


Expand Down
3 changes: 2 additions & 1 deletion tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from dvc.stage import StageFileBadNameError
from dvc.stage import StageFileDoesNotExistError
from dvc.system import System
from dvc.utils import relpath, walk_files
from dvc.utils import relpath
from dvc.utils.fs import walk_files
from dvc.utils.stage import dump_stage_file
from dvc.utils.stage import load_stage_file
from tests.basic_env import TestDvc
Expand Down
36 changes: 36 additions & 0 deletions tests/func/test_fs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import stat

import pytest

from dvc.utils.fs import makedirs, copyfile


@pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.")
def test_makedirs_permissions(tmp_dir):
dir_mode = 0o755
intermediate_dir = "тСстовая-дирСктория"
test_dir = os.path.join(intermediate_dir, "data")

assert not os.path.exists(intermediate_dir)

makedirs(test_dir, mode=dir_mode)

assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode
assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode


def test_copyfile(tmp_dir):
src = "file1"
dest = "file2"
dest_dir = "testdir"

tmp_dir.gen(src, "file1contents")

os.mkdir(dest_dir)

copyfile(src, dest)
assert (tmp_dir / dest).read_text() == "file1contents"

copyfile(src, dest_dir)
assert (tmp_dir / dest_dir / src).read_text() == "file1contents"
2 changes: 1 addition & 1 deletion tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from dvc.repo.get import GetDVCFileError, PathMissingError
from dvc.repo import Repo
from dvc.system import System
from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from dvc.compat import fspath
from tests.utils import trees_equal

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_get_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

from dvc.repo import Repo
from dvc.utils import makedirs
from dvc.utils.fs import makedirs


def test_get_file(repo_dir):
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dvc.exceptions import NoOutputInExternalRepoError
from dvc.stage import Stage
from dvc.system import System
from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from dvc.compat import fspath
from tests.utils import trees_equal

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_import_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import dvc
from dvc.main import main
from dvc.utils import makedirs
from dvc.utils.fs import makedirs
from tests.basic_env import TestDvc
from tests.utils import spy

Expand Down
34 changes: 0 additions & 34 deletions tests/func/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,8 @@
# encoding: utf-8
import os
import stat

import pytest

from dvc import utils


def test_copyfile(tmp_dir):
src = "file1"
dest = "file2"
dest_dir = "testdir"

tmp_dir.gen(src, "file1contents")

os.mkdir(dest_dir)

utils.copyfile(src, dest)
assert (tmp_dir / dest).read_text() == "file1contents"

utils.copyfile(src, dest_dir)
assert (tmp_dir / dest_dir / src).read_text() == "file1contents"


def test_file_md5_crlf(tmp_dir):
tmp_dir.gen("cr", b"a\nb\nc")
tmp_dir.gen("crlf", b"a\r\nb\r\nc")
Expand Down Expand Up @@ -62,17 +42,3 @@ def test_boxify():
)

assert expected == utils.boxify("message")


@pytest.mark.skipif(os.name == "nt", reason="Not supported for Windows.")
def test_makedirs_permissions(tmp_dir):
dir_mode = 0o755
intermediate_dir = "тСстовая-дирСктория"
test_dir = os.path.join(intermediate_dir, "data")

assert not os.path.exists(intermediate_dir)

utils.makedirs(test_dir, mode=dir_mode)

assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode
assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode
2 changes: 1 addition & 1 deletion tests/unit/remote/test_remote_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest
import os
from dvc.remote.s3 import RemoteS3
from dvc.utils import walk_files
from dvc.utils.fs import walk_files
from dvc.path_info import PathInfo
from tests.remotes import GCP, S3Mocked

Expand Down
47 changes: 47 additions & 0 deletions tests/unit/utils/test_fs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filecmp
import os
from unittest import TestCase

Expand All @@ -11,11 +12,14 @@
from dvc.system import System
from dvc.utils import relpath
from dvc.utils.fs import BasePathNotInCheckedPathException
from dvc.utils.fs import copyfile
from dvc.utils.fs import contains_symlink_up_to
from dvc.utils.fs import get_inode
from dvc.utils.fs import get_mtime_and_size
from dvc.utils.fs import move
from dvc.utils.fs import path_isin, remove
from dvc.utils.fs import makedirs
from dvc.utils.fs import walk_files
from tests.basic_env import TestDir
from tests.utils import spy

Expand Down Expand Up @@ -202,3 +206,46 @@ def test_path_isin_with_absolute_path():
child = os.path.join(parent, "to", "folder")

assert path_isin(child, parent)


def test_makedirs(repo_dir):
path = os.path.join(repo_dir.root_dir, "directory")
path_info = PathInfo(
os.path.join(repo_dir.root_dir, "another", "directory")
)

makedirs(path)
assert os.path.isdir(path)

makedirs(path_info)
assert os.path.isdir(path_info.fspath)


@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR])
def test_copyfile(path, repo_dir):
src = repo_dir.FOO
dest = path
src_info = PathInfo(repo_dir.BAR)
dest_info = PathInfo(path)

copyfile(src, dest)
if os.path.isdir(dest):
assert filecmp.cmp(
src, os.path.join(dest, os.path.basename(src)), shallow=False
)
else:
assert filecmp.cmp(src, dest, shallow=False)

copyfile(src_info, dest_info)
if os.path.isdir(dest_info.fspath):
assert filecmp.cmp(
src_info.fspath,
os.path.join(dest_info.fspath, os.path.basename(src_info.fspath)),
shallow=False,
)
else:
assert filecmp.cmp(src_info.fspath, dest_info.fspath, shallow=False)


def test_walk_files(tmp_dir):
assert list(walk_files(".")) == list(walk_files(tmp_dir))
Loading