Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add: output improvements #2546

Merged
merged 22 commits into from
Oct 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions dvc/command/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dvc.exceptions import DvcException
from dvc.command.base import CmdBase, append_doc_link
from dvc.exceptions import RecursiveAddingWhileUsingFilename


logger = logging.getLogger(__name__)
Expand All @@ -14,49 +15,50 @@ class CmdAdd(CmdBase):
def run(self):
try:
if len(self.args.targets) > 1 and self.args.file:
raise DvcException("can't use '--file' with multiple targets")
raise RecursiveAddingWhileUsingFilename()

for target in self.args.targets:
self.repo.add(
target,
recursive=self.args.recursive,
no_commit=self.args.no_commit,
fname=self.args.file,
)
self.repo.add(
self.args.targets,
recursive=self.args.recursive,
no_commit=self.args.no_commit,
fname=self.args.file,
)

except DvcException:
logger.exception("failed to add file")
logger.exception("")
return 1
return 0


def add_parser(subparsers, parent_parser):
ADD_HELP = "Take data files or directories under DVC control."
ADD_HELP = "Track data files or directories with DVC."

add_parser = subparsers.add_parser(
parser = subparsers.add_parser(
"add",
parents=[parent_parser],
description=append_doc_link(ADD_HELP, "add"),
help=ADD_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
add_parser.add_argument(
parser.add_argument(
"-R",
"--recursive",
action="store_true",
default=False,
help="Recursively add each file under the directory.",
help="Recursively add files under directory targets.",
)
add_parser.add_argument(
parser.add_argument(
"--no-commit",
action="store_true",
default=False,
help="Don't put files/directories into cache.",
)
add_parser.add_argument(
"-f", "--file", help="Specify name of the DVC-file it generates."
parser.add_argument(
"-f",
"--file",
help="Specify name of the DVC-file this command will generate.",
)
add_parser.add_argument(
parser.add_argument(
"targets", nargs="+", help="Input files/directories to add."
)
add_parser.set_defaults(func=CmdAdd)
parser.set_defaults(func=CmdAdd)
4 changes: 2 additions & 2 deletions dvc/command/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def append_doc_link(help_message, path):
if not path:
return help_message
doc_base = "https://man.dvc.org/"
return "{message}\ndocumentation: {blue}{base}{path}{nc}".format(
return "{message}\nDocumentation: <{blue}{base}{path}{nc}>".format(
message=help_message,
base=doc_base,
path=path,
Expand Down Expand Up @@ -57,7 +57,7 @@ def default_targets(self):

# Abstract methods that have to be implemented by any inheritance class
def run(self):
pass
raise NotImplementedError


class CmdBaseNoRepo(CmdBase):
Expand Down
2 changes: 1 addition & 1 deletion dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def __init__(self, path, cause=None):
class RecursiveAddingWhileUsingFilename(DvcException):
def __init__(self):
super(RecursiveAddingWhileUsingFilename, self).__init__(
"using fname with recursive is not allowed."
"cannot use `fname` with multiple targets or `-R|--recursive`"
jorgeorpinel marked this conversation as resolved.
Show resolved Hide resolved
)


Expand Down
39 changes: 33 additions & 6 deletions dvc/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def filter(self, record):
return record.levelno < logging.WARNING


class ExcludeInfoFilter(logging.Filter):
def filter(self, record):
return record.levelno < logging.INFO


class ColorFormatter(logging.Formatter):
"""Enable color support when logging to a terminal that supports it.
Expand Down Expand Up @@ -170,16 +175,26 @@ def setup(level=logging.INFO):
logging.config.dictConfig(
{
"version": 1,
"filters": {"exclude_errors": {"()": ExcludeErrorsFilter}},
"filters": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain what is going on here in general? The commit message doesn't have any explanation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was needed as previously only logging.DEBUG and logging.WARNING were handled by dvc.logger.LoggerHandler. However this change ensures that logging.INFO is also handled by our class. The custom class is required to ensure that logger.info() interleaves properly with dvc.progress.Tqdm().

I should've described this commit as "progress-aware logger.info"

"exclude_errors": {"()": ExcludeErrorsFilter},
"exclude_info": {"()": ExcludeInfoFilter},
},
"formatters": {"color": {"()": ColorFormatter}},
"handlers": {
"console": {
"console_info": {
"class": "dvc.logger.LoggerHandler",
"level": "DEBUG",
"level": "INFO",
"formatter": "color",
"stream": "ext://sys.stdout",
"filters": ["exclude_errors"],
},
"console_debug": {
"class": "dvc.logger.LoggerHandler",
"level": "DEBUG",
"formatter": "color",
"stream": "ext://sys.stdout",
"filters": ["exclude_info"],
},
"console_errors": {
"class": "dvc.logger.LoggerHandler",
"level": "WARNING",
Expand All @@ -190,15 +205,27 @@ def setup(level=logging.INFO):
"loggers": {
"dvc": {
"level": level,
"handlers": ["console", "console_errors"],
"handlers": [
"console_info",
"console_debug",
"console_errors",
],
},
"paramiko": {
"level": logging.CRITICAL,
"handlers": ["console", "console_errors"],
"handlers": [
"console_info",
"console_debug",
"console_errors",
],
},
"flufl.lock": {
"level": logging.CRITICAL,
"handlers": ["console", "console_errors"],
"handlers": [
"console_info",
"console_debug",
"console_errors",
],
},
},
}
Expand Down
19 changes: 8 additions & 11 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
DvcIgnoreInCollectedDirError,
)
from dvc.progress import Tqdm
from dvc.utils import LARGE_DIR_SIZE, tmp_fname, move, relpath, makedirs
from dvc.utils import tmp_fname, move, relpath, makedirs
from dvc.state import StateNoop
from dvc.path_info import PathInfo, URLInfo
from dvc.utils.http import open_url
Expand Down Expand Up @@ -177,16 +177,13 @@ def _calculate_checksums(self, file_infos):
file_infos = list(file_infos)
with ThreadPoolExecutor(max_workers=self.checksum_jobs) as executor:
tasks = executor.map(self.get_file_checksum, file_infos)

if len(file_infos) > LARGE_DIR_SIZE:
logger.info(
(
"Computing md5 for a large number of files. "
"This is only done once."
)
)
tasks = Tqdm(tasks, total=len(file_infos), unit="md5")
checksums = dict(zip(file_infos, tasks))
with Tqdm(
tasks,
total=len(file_infos),
unit="md5",
desc="Computing hashes (only done once)",
) as tasks:
checksums = dict(zip(file_infos, tasks))
return checksums

def _collect_dir(self, path_info):
Expand Down
65 changes: 40 additions & 25 deletions dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,63 @@
import os
import logging
import colorama
from six import string_types

from dvc.repo.scm_context import scm_context
from dvc.stage import Stage
from dvc.utils import walk_files, LARGE_DIR_SIZE
from dvc.exceptions import RecursiveAddingWhileUsingFilename

from dvc.progress import Tqdm
from . import locked


logger = logging.getLogger(__name__)


@locked
@scm_context
def add(repo, target, recursive=False, no_commit=False, fname=None):
def add(repo, targets, recursive=False, no_commit=False, fname=None):
if recursive and fname:
raise RecursiveAddingWhileUsingFilename()

targets = _find_all_targets(repo, target, recursive)
if isinstance(targets, string_types):
targets = [targets]

if os.path.isdir(target) and len(targets) > LARGE_DIR_SIZE:
logger.warning(
"You are adding a large directory '{target}' recursively,"
" consider tracking it as a whole instead.\n"
"{purple}HINT:{nc} Remove the generated DVC-file and then"
" run {cyan}dvc add {target}{nc}".format(
purple=colorama.Fore.MAGENTA,
cyan=colorama.Fore.CYAN,
nc=colorama.Style.RESET_ALL,
target=target,
)
)
stages_list = []
with Tqdm(total=len(targets), desc="Add", unit="file", leave=True) as pbar:
for target in targets:
sub_targets = _find_all_targets(repo, target, recursive)
pbar.total += len(sub_targets) - 1

stages = _create_stages(repo, targets, fname)
if os.path.isdir(target) and len(sub_targets) > LARGE_DIR_SIZE:
logger.warning(
"You are adding a large directory '{target}' recursively,"
" consider tracking it as a whole instead.\n"
"{purple}HINT:{nc} Remove the generated DVC-file and then"
" run {cyan}dvc add {target}{nc}".format(
purple=colorama.Fore.MAGENTA,
cyan=colorama.Fore.CYAN,
nc=colorama.Style.RESET_ALL,
target=target,
)
)

repo.check_modified_graph(stages)
stages = _create_stages(repo, sub_targets, fname, pbar=pbar)

for stage in stages:
stage.save()
repo.check_modified_graph(stages)

if not no_commit:
stage.commit()
for stage in stages:
stage.save()

stage.dump()
if not no_commit:
stage.commit()

return stages
stage.dump()

stages_list += stages
# remove filled bar bit of progress, leaving stats
pbar.bar_format = pbar.BAR_FMT_DEFAULT.replace("|{bar:10}|", " ")
casperdcl marked this conversation as resolved.
Show resolved Hide resolved

return stages_list


def _find_all_targets(repo, target, recursive):
Expand All @@ -64,15 +75,19 @@ def _find_all_targets(repo, target, recursive):
return [target]


def _create_stages(repo, targets, fname):
def _create_stages(repo, targets, fname, pbar=None):
stages = []

for out in targets:
stage = Stage.create(repo, outs=[out], add=True, fname=fname)

if not stage:
if pbar is not None:
pbar.total -= 1
continue

stages.append(stage)
if pbar is not None:
pbar.update_desc(out)

return stages
2 changes: 1 addition & 1 deletion dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def dump(self):

self._check_dvc_filename(fname)

logger.info(
logger.debug(
"Saving information to '{file}'.".format(file=relpath(fname))
Comment on lines -666 to 667
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And one last question: Is this really a debug output? I also find that message useful in the output.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the thing is the tracked file name will be printed in the progress bar itself, while the dvc file names will all be printed at the end. This means this message should no longer be required. I left it in for 1. debugging and 2. not sure if the 12 other uses of stage.dump() actually need it to be logger.info. This is the only real issue I have with this PR atm.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Feel fee to ask @efiop and/or resolve this conversation when you see fit. Thanks.

)
d = self.dumpd()
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ def test_progress_awareness(self, mocker, capsys, caplog):


def test_handlers():
stdout, stderr = logger.handlers
out, deb, err = logger.handlers

assert stdout.level == logging.DEBUG
assert stderr.level == logging.WARNING
assert out.level == logging.INFO
assert deb.level == logging.DEBUG
assert err.level == logging.WARNING