Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restyle diff: reimplement interface and tests from scratch #3258

Closed
wants to merge 35 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
198bf55
diff: reimplement interface and tests from scratch
Jan 22, 2020
ebd5a5a
diff: address comments from review
Jan 24, 2020
b2de740
tests: add result examples
Jan 24, 2020
f791371
diff: change structure to be indexed by path
Jan 24, 2020
bc8871e
diff: adjust some wording
Jan 24, 2020
576a276
diff: fix some tests and adjust result for modify
Jan 25, 2020
07abd1b
diff: add support for specifying a target
Jan 25, 2020
5ace747
diff: group by change state and support dirs
Jan 26, 2020
3db983f
tests/diff: use os.path.join instead of implying separator
Jan 26, 2020
b251682
diff: document diffables_from_output
Jan 26, 2020
a4ade13
diff: add JSON output
Jan 26, 2020
bd1adf2
diff: add CLI output
Jan 26, 2020
fe4eba5
diff: handle exceptions
Jan 26, 2020
6ef90f5
diff: add option to display checksums
Jan 27, 2020
bb22acd
diff: enable using diff when there is no cache
Jan 27, 2020
27ada02
scripts: update completion scripts for `diff`
Jan 27, 2020
e6a957a
tests/diff: add directory to no_cache_entry
Jan 27, 2020
10c0ff2
diff: remove extra sorting
Jan 27, 2020
9d14acd
:nail_care: remove excesive parenthesis
Jan 27, 2020
9171b9e
:nail_care: add reference explaining git revisions
Jan 27, 2020
2370cb1
diff: use logger instead of print
Jan 27, 2020
fced171
diff: --json -> --show-json
Jan 27, 2020
423a616
diff: make --checksums work with --json-show
Jan 27, 2020
a12d81f
tests/diff: fix windows compat issues
Jan 27, 2020
3c6b52b
diff: improve RevError message
Jan 27, 2020
7f6324e
diff: remove --target
Jan 27, 2020
f41ceb5
tests: adjust doc
Jan 27, 2020
76f5514
refactor
Jan 29, 2020
8e9d814
diff: remove deadcode, replace diffable with dicts
Jan 29, 2020
debf6cb
scm: dead code
Jan 30, 2020
e501549
diff: add a summary at the bottom
Jan 30, 2020
e9bd959
tests/diff: dont care about sorted jsons
Jan 30, 2020
2c22957
wording: update command help
Jan 30, 2020
cfefde6
tests/diff: order dictionary (second try)
Jan 30, 2020
2e77473
Restyled by black
restyled-commits Jan 30, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 118 additions & 126 deletions dvc/command/diff.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import argparse
import json
import logging

import humanize
import inflect
from funcy import compact
import colorama

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.command.base import CmdBase, append_doc_link
from dvc.exceptions import DvcException


Expand All @@ -15,156 +13,150 @@

class CmdDiff(CmdBase):
@staticmethod
def _print_size(size):
if size < 0:
change = "decreased by {}"
elif size > 0:
change = "increased by {}"
else:
change = "not changed"
natur_size = humanize.naturalsize(abs(size))
return change.format(natur_size)
def _format(diff):
"""
Given a diff structure, generate a string of paths separated
by new lines and grouped together by their state.

A group's header is colored and its entries are sorted to enhance
readability, for example:

Added:
another_file.txt
backup.tar
dir/
dir/1

An example of a diff formatted when entries contain checksums:

Added:
d3b07384 foo

Modified:
c157a790..f98bf6f1 bar

If a group has no entries, it won't be included in the result.

At the bottom, include a summary with the number of files per state.
"""

def _digest(checksum):
if type(checksum) is str:
return checksum[0:8]
return "{}..{}".format(checksum["old"][0:8], checksum["new"][0:8])

colors = {
"added": colorama.Fore.GREEN,
"modified": colorama.Fore.YELLOW,
"deleted": colorama.Fore.RED,
}

summary = {}
groups = []

for state in ["added", "deleted", "modified"]:
summary[state] = 0
entries = diff[state]

if not entries:
continue

content = []

for entry in entries:
path = entry["path"]
checksum = entry.get("checksum")
summary[state] += 1 if not path.endswith("/") else 0
content.append(
"{space}{checksum}{separator}{path}".format(
space=" ",
checksum=_digest(checksum) if checksum else "",
separator=" " if checksum else "",
path=entry["path"],
)
)

groups.append(
"{color}{header}{nc}:\n{content}".format(
color=colors[state],
header=state.capitalize(),
nc=colorama.Fore.RESET,
content="\n".join(content),
)
)

@staticmethod
def _get_md5_string(sign, file_name, checksum):
sample_msg = ""
if file_name:
sample_msg = "{}{} with md5 {}\n"
sample_msg = sample_msg.format(sign, file_name, checksum)
return sample_msg

@classmethod
def _get_dir_changes(cls, dct):
import dvc.repo.diff as diff

engine = inflect.engine()
changes_msg = (
"{} {} untouched, {} {} modified, {} {} added, "
"{} {} deleted, size was {}"
groups.append(
"summary: added ({added}), deleted ({deleted}),"
" modified ({modified})".format_map(summary)
)
changes_msg = changes_msg.format(
dct[diff.DIFF_IDENT],
engine.plural("file", dct[diff.DIFF_IDENT]),
dct[diff.DIFF_CHANGE],
engine.plural("file", dct[diff.DIFF_CHANGE]),
dct[diff.DIFF_NEW],
engine.plural("file", dct[diff.DIFF_NEW]),
dct[diff.DIFF_DEL],
engine.plural("file", dct[diff.DIFF_DEL]),
cls._print_size(dct[diff.DIFF_SIZE]),
)
return changes_msg

@classmethod
def _get_file_changes(cls, dct):
import dvc.repo.diff as diff

if (
dct.get(diff.DIFF_OLD_FILE)
and dct.get(diff.DIFF_NEW_FILE)
and dct[diff.DIFF_SIZE] == 0
):
msg = "file size was not changed"
elif dct.get(diff.DIFF_NEW_FILE):
msg = "added file with size {}".format(
humanize.naturalsize(dct[diff.DIFF_SIZE])
)
elif dct.get(diff.DIFF_OLD_FILE):
msg = "deleted file with size {}".format(
humanize.naturalsize(abs(dct[diff.DIFF_SIZE]))
)
else:
msg = "file was modified, file size {}".format(
cls._print_size(dct[diff.DIFF_SIZE])
)
return msg

@classmethod
def _get_royal_changes(cls, dct):
import dvc.repo.diff as diff
return "\n\n".join(groups)

if dct[diff.DIFF_SIZE] != diff.DIFF_SIZE_UNKNOWN:
if dct.get("is_dir"):
return cls._get_dir_changes(dct)
else:
return cls._get_file_changes(dct)
return "size is ?"
def run(self):
try:
diff = self.repo.diff(self.args.a_ref, self.args.b_ref)

@classmethod
def _show(cls, diff_dct):
import dvc.repo.diff as diff
if not any(diff.values()):
return 0

msg = "dvc diff from {} to {}".format(
diff_dct[diff.DIFF_A_REF], diff_dct[diff.DIFF_B_REF]
)
if diff_dct.get(diff.DIFF_EQUAL):
logger.info(msg)
return
for dct in diff_dct[diff.DIFF_LIST]:
msg += "\n\ndiff for '{}'\n".format(dct[diff.DIFF_TARGET])
msg += cls._get_md5_string(
"-",
dct.get(diff.DIFF_OLD_FILE),
dct.get(diff.DIFF_OLD_CHECKSUM),
)
msg += cls._get_md5_string(
"+",
dct.get(diff.DIFF_NEW_FILE),
dct.get(diff.DIFF_NEW_CHECKSUM),
)
msg += "\n"
msg += cls._get_royal_changes(dct)
logger.info(msg)
return msg
if not self.args.checksums:
for _, entries in diff.items():
for entry in entries:
del entry["checksum"]

if self.args.show_json:
res = json.dumps(diff)
else:
res = self._format(diff)

logger.info(res)

def run(self):
try:
msg = self.repo.diff(
self.args.a_ref, target=self.args.target, b_ref=self.args.b_ref
)
self._show(msg)
except DvcException:
msg = "failed to get 'diff {}'"
args = " ".join(
compact([self.args.target, self.args.a_ref, self.args.b_ref])
)
msg = msg.format(args)
logger.exception(msg)
logger.exception("failed to get diff")
return 1
return 0


def add_parser(subparsers, parent_parser):
DIFF_DESCRIPTION = (
"Show diff of a data file or a directory that is under DVC control.\n"
"Some basic statistics summary, how many files were deleted/changed."
"Compare two different versions of your DVC project (tracked by Git)"
" and shows a list of paths grouped in the following categories:"
" added, modified, or deleted."
)
DIFF_HELP = "Show a diff of a DVC controlled data file or a directory."
diff_parser = subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(DIFF_DESCRIPTION, "diff"),
help=DIFF_HELP,
help=DIFF_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
diff_parser.add_argument(
"-t",
"--target",
"a_ref",
help=(
"Source path to a data file or directory. Default None. "
"If not specified, compares all files and directories "
"that are under DVC control in the current working space."
"Git reference to the old version that you want to compare"
" (defaults to HEAD)"
),
)
diff_parser.add_argument(
"a_ref", help="Git reference from which diff calculates"
nargs="?",
default="HEAD",
)
diff_parser.add_argument(
"b_ref",
help=(
"Git reference until which diff calculates, if omitted "
"diff shows the difference between current HEAD and a_ref"
"Git reference to the new version that you want to compare."
" (defaults to the working tree)"
),
nargs="?",
)
diff_parser.add_argument(
"--show-json",
help="Format the output into a JSON",
action="store_true",
default=False,
)
diff_parser.add_argument(
"--checksums",
help="Display checksums for each entry",
action="store_true",
default=False,
)
diff_parser.set_defaults(func=CmdDiff)
Loading