Skip to content

Commit

Permalink
add: output improvements (#2546)
Browse files Browse the repository at this point in the history
* some doc improvements

* add: intermediate progress

* update after rebase

* linting

* fix checksum progress cleanup by ensuring gc

* progress-aware logger.info

* re-fix checksum pbar closure

* multi-file add

* make stage.dump() less verbose.

TODO: check all instances of stage.dump and possibly make verbose

* decrease verbosity of checksum calculations

* leave dvc.add stats

* update/fix logger tests

* minor doc reword

Co-Authored-By: Ruslan Kuprieiev <[email protected]>

* tidy API target_;list => targets

- as per #2546 (comment)

* tidy RecursiveAddingWhileUsingFilename message duplication

- fixes #2546 (comment)

* minor logging uncomment

- fixes #2546 (comment)

* minor doc improvements

- #2546 (comment)
- #2546 (comment)

* tidy exception

- fixes #2546 (comment)

* minor doc update

- fixes https://github.com/iterative/dvc/pull/2546/files#r337294491
- related to iterative/dvc.org#719

* Adding => add

- fixes #2546 (comment)

* various review fixes

- minor comment as per
  #2546 (comment)
- revert warning style fix
  #2546 (comment)
- Abstract error consistency
  #2546 (comment)
- minor comment as per
  #2546 (comment)

* remove diabling progress option

- fixes #2546 (comment)
  • Loading branch information
casperdcl authored and efiop committed Oct 24, 2019
1 parent 3667830 commit 9ce3b0c
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 67 deletions.
38 changes: 20 additions & 18 deletions dvc/command/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dvc.exceptions import DvcException
from dvc.command.base import CmdBase, append_doc_link
from dvc.exceptions import RecursiveAddingWhileUsingFilename


logger = logging.getLogger(__name__)
Expand All @@ -14,49 +15,50 @@ class CmdAdd(CmdBase):
def run(self):
try:
if len(self.args.targets) > 1 and self.args.file:
raise DvcException("can't use '--file' with multiple targets")
raise RecursiveAddingWhileUsingFilename()

for target in self.args.targets:
self.repo.add(
target,
recursive=self.args.recursive,
no_commit=self.args.no_commit,
fname=self.args.file,
)
self.repo.add(
self.args.targets,
recursive=self.args.recursive,
no_commit=self.args.no_commit,
fname=self.args.file,
)

except DvcException:
logger.exception("failed to add file")
logger.exception("")
return 1
return 0


def add_parser(subparsers, parent_parser):
ADD_HELP = "Take data files or directories under DVC control."
ADD_HELP = "Track data files or directories with DVC."

add_parser = subparsers.add_parser(
parser = subparsers.add_parser(
"add",
parents=[parent_parser],
description=append_doc_link(ADD_HELP, "add"),
help=ADD_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
add_parser.add_argument(
parser.add_argument(
"-R",
"--recursive",
action="store_true",
default=False,
help="Recursively add each file under the directory.",
help="Recursively add files under directory targets.",
)
add_parser.add_argument(
parser.add_argument(
"--no-commit",
action="store_true",
default=False,
help="Don't put files/directories into cache.",
)
add_parser.add_argument(
"-f", "--file", help="Specify name of the DVC-file it generates."
parser.add_argument(
"-f",
"--file",
help="Specify name of the DVC-file this command will generate.",
)
add_parser.add_argument(
parser.add_argument(
"targets", nargs="+", help="Input files/directories to add."
)
add_parser.set_defaults(func=CmdAdd)
parser.set_defaults(func=CmdAdd)
4 changes: 2 additions & 2 deletions dvc/command/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def append_doc_link(help_message, path):
if not path:
return help_message
doc_base = "https://man.dvc.org/"
return "{message}\ndocumentation: {blue}{base}{path}{nc}".format(
return "{message}\nDocumentation: <{blue}{base}{path}{nc}>".format(
message=help_message,
base=doc_base,
path=path,
Expand Down Expand Up @@ -57,7 +57,7 @@ def default_targets(self):

# Abstract methods that have to be implemented by any inheritance class
def run(self):
pass
raise NotImplementedError


class CmdBaseNoRepo(CmdBase):
Expand Down
2 changes: 1 addition & 1 deletion dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def __init__(self, path, cause=None):
class RecursiveAddingWhileUsingFilename(DvcException):
def __init__(self):
super(RecursiveAddingWhileUsingFilename, self).__init__(
"using fname with recursive is not allowed."
"cannot use `fname` with multiple targets or `-R|--recursive`"
)


Expand Down
39 changes: 33 additions & 6 deletions dvc/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def filter(self, record):
return record.levelno < logging.WARNING


class ExcludeInfoFilter(logging.Filter):
def filter(self, record):
return record.levelno < logging.INFO


class ColorFormatter(logging.Formatter):
"""Enable color support when logging to a terminal that supports it.
Expand Down Expand Up @@ -170,16 +175,26 @@ def setup(level=logging.INFO):
logging.config.dictConfig(
{
"version": 1,
"filters": {"exclude_errors": {"()": ExcludeErrorsFilter}},
"filters": {
"exclude_errors": {"()": ExcludeErrorsFilter},
"exclude_info": {"()": ExcludeInfoFilter},
},
"formatters": {"color": {"()": ColorFormatter}},
"handlers": {
"console": {
"console_info": {
"class": "dvc.logger.LoggerHandler",
"level": "DEBUG",
"level": "INFO",
"formatter": "color",
"stream": "ext://sys.stdout",
"filters": ["exclude_errors"],
},
"console_debug": {
"class": "dvc.logger.LoggerHandler",
"level": "DEBUG",
"formatter": "color",
"stream": "ext://sys.stdout",
"filters": ["exclude_info"],
},
"console_errors": {
"class": "dvc.logger.LoggerHandler",
"level": "WARNING",
Expand All @@ -190,15 +205,27 @@ def setup(level=logging.INFO):
"loggers": {
"dvc": {
"level": level,
"handlers": ["console", "console_errors"],
"handlers": [
"console_info",
"console_debug",
"console_errors",
],
},
"paramiko": {
"level": logging.CRITICAL,
"handlers": ["console", "console_errors"],
"handlers": [
"console_info",
"console_debug",
"console_errors",
],
},
"flufl.lock": {
"level": logging.CRITICAL,
"handlers": ["console", "console_errors"],
"handlers": [
"console_info",
"console_debug",
"console_errors",
],
},
},
}
Expand Down
19 changes: 8 additions & 11 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
DvcIgnoreInCollectedDirError,
)
from dvc.progress import Tqdm
from dvc.utils import LARGE_DIR_SIZE, tmp_fname, move, relpath, makedirs
from dvc.utils import tmp_fname, move, relpath, makedirs
from dvc.state import StateNoop
from dvc.path_info import PathInfo, URLInfo
from dvc.utils.http import open_url
Expand Down Expand Up @@ -177,16 +177,13 @@ def _calculate_checksums(self, file_infos):
file_infos = list(file_infos)
with ThreadPoolExecutor(max_workers=self.checksum_jobs) as executor:
tasks = executor.map(self.get_file_checksum, file_infos)

if len(file_infos) > LARGE_DIR_SIZE:
logger.info(
(
"Computing md5 for a large number of files. "
"This is only done once."
)
)
tasks = Tqdm(tasks, total=len(file_infos), unit="md5")
checksums = dict(zip(file_infos, tasks))
with Tqdm(
tasks,
total=len(file_infos),
unit="md5",
desc="Computing hashes (only done once)",
) as tasks:
checksums = dict(zip(file_infos, tasks))
return checksums

def _collect_dir(self, path_info):
Expand Down
65 changes: 40 additions & 25 deletions dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,63 @@
import os
import logging
import colorama
from six import string_types

from dvc.repo.scm_context import scm_context
from dvc.stage import Stage
from dvc.utils import walk_files, LARGE_DIR_SIZE
from dvc.exceptions import RecursiveAddingWhileUsingFilename

from dvc.progress import Tqdm
from . import locked


logger = logging.getLogger(__name__)


@locked
@scm_context
def add(repo, target, recursive=False, no_commit=False, fname=None):
def add(repo, targets, recursive=False, no_commit=False, fname=None):
if recursive and fname:
raise RecursiveAddingWhileUsingFilename()

targets = _find_all_targets(repo, target, recursive)
if isinstance(targets, string_types):
targets = [targets]

if os.path.isdir(target) and len(targets) > LARGE_DIR_SIZE:
logger.warning(
"You are adding a large directory '{target}' recursively,"
" consider tracking it as a whole instead.\n"
"{purple}HINT:{nc} Remove the generated DVC-file and then"
" run {cyan}dvc add {target}{nc}".format(
purple=colorama.Fore.MAGENTA,
cyan=colorama.Fore.CYAN,
nc=colorama.Style.RESET_ALL,
target=target,
)
)
stages_list = []
with Tqdm(total=len(targets), desc="Add", unit="file", leave=True) as pbar:
for target in targets:
sub_targets = _find_all_targets(repo, target, recursive)
pbar.total += len(sub_targets) - 1

stages = _create_stages(repo, targets, fname)
if os.path.isdir(target) and len(sub_targets) > LARGE_DIR_SIZE:
logger.warning(
"You are adding a large directory '{target}' recursively,"
" consider tracking it as a whole instead.\n"
"{purple}HINT:{nc} Remove the generated DVC-file and then"
" run {cyan}dvc add {target}{nc}".format(
purple=colorama.Fore.MAGENTA,
cyan=colorama.Fore.CYAN,
nc=colorama.Style.RESET_ALL,
target=target,
)
)

repo.check_modified_graph(stages)
stages = _create_stages(repo, sub_targets, fname, pbar=pbar)

for stage in stages:
stage.save()
repo.check_modified_graph(stages)

if not no_commit:
stage.commit()
for stage in stages:
stage.save()

stage.dump()
if not no_commit:
stage.commit()

return stages
stage.dump()

stages_list += stages
# remove filled bar bit of progress, leaving stats
pbar.bar_format = pbar.BAR_FMT_DEFAULT.replace("|{bar:10}|", " ")

return stages_list


def _find_all_targets(repo, target, recursive):
Expand All @@ -64,15 +75,19 @@ def _find_all_targets(repo, target, recursive):
return [target]


def _create_stages(repo, targets, fname):
def _create_stages(repo, targets, fname, pbar=None):
stages = []

for out in targets:
stage = Stage.create(repo, outs=[out], add=True, fname=fname)

if not stage:
if pbar is not None:
pbar.total -= 1
continue

stages.append(stage)
if pbar is not None:
pbar.update_desc(out)

return stages
2 changes: 1 addition & 1 deletion dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def dump(self):

self._check_dvc_filename(fname)

logger.info(
logger.debug(
"Saving information to '{file}'.".format(file=relpath(fname))
)
d = self.dumpd()
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ def test_progress_awareness(self, mocker, capsys, caplog):


def test_handlers():
stdout, stderr = logger.handlers
out, deb, err = logger.handlers

assert stdout.level == logging.DEBUG
assert stderr.level == logging.WARNING
assert out.level == logging.INFO
assert deb.level == logging.DEBUG
assert err.level == logging.WARNING

0 comments on commit 9ce3b0c

Please sign in to comment.