Skip to content

Commit

Permalink
Merge pull request #3229 from mroutis/enhance-diff
Browse files Browse the repository at this point in the history
diff: reimplement interface and tests from scratch
  • Loading branch information
efiop authored Jan 30, 2020
2 parents b78ef78 + 673afe2 commit fd1e6f4
Show file tree
Hide file tree
Showing 8 changed files with 468 additions and 705 deletions.
245 changes: 119 additions & 126 deletions dvc/command/diff.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import argparse
import json
import logging
import os

import humanize
import inflect
from funcy import compact
import colorama

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.command.base import CmdBase, append_doc_link
from dvc.exceptions import DvcException


Expand All @@ -15,156 +14,150 @@

class CmdDiff(CmdBase):
@staticmethod
def _print_size(size):
if size < 0:
change = "decreased by {}"
elif size > 0:
change = "increased by {}"
else:
change = "not changed"
natur_size = humanize.naturalsize(abs(size))
return change.format(natur_size)
def _format(diff):
"""
Given a diff structure, generate a string of paths separated
by new lines and grouped together by their state.
A group's header is colored and its entries are sorted to enhance
readability, for example:
Added:
another_file.txt
backup.tar
dir/
dir/1
An example of a diff formatted when entries contain checksums:
Added:
d3b07384 foo
Modified:
c157a790..f98bf6f1 bar
If a group has no entries, it won't be included in the result.
At the bottom, include a summary with the number of files per state.
"""

def _digest(checksum):
if type(checksum) is str:
return checksum[0:8]
return "{}..{}".format(checksum["old"][0:8], checksum["new"][0:8])

colors = {
"added": colorama.Fore.GREEN,
"modified": colorama.Fore.YELLOW,
"deleted": colorama.Fore.RED,
}

summary = {}
groups = []

for state in ["added", "deleted", "modified"]:
summary[state] = 0
entries = diff[state]

if not entries:
continue

content = []

for entry in entries:
path = entry["path"]
checksum = entry.get("checksum")
summary[state] += 1 if not path.endswith(os.sep) else 0
content.append(
"{space}{checksum}{separator}{path}".format(
space=" ",
checksum=_digest(checksum) if checksum else "",
separator=" " if checksum else "",
path=entry["path"],
)
)

groups.append(
"{color}{header}{nc}:\n{content}".format(
color=colors[state],
header=state.capitalize(),
nc=colorama.Fore.RESET,
content="\n".join(content),
)
)

@staticmethod
def _get_md5_string(sign, file_name, checksum):
sample_msg = ""
if file_name:
sample_msg = "{}{} with md5 {}\n"
sample_msg = sample_msg.format(sign, file_name, checksum)
return sample_msg

@classmethod
def _get_dir_changes(cls, dct):
import dvc.repo.diff as diff

engine = inflect.engine()
changes_msg = (
"{} {} untouched, {} {} modified, {} {} added, "
"{} {} deleted, size was {}"
groups.append(
"summary: added ({added}), deleted ({deleted}),"
" modified ({modified})".format_map(summary)
)
changes_msg = changes_msg.format(
dct[diff.DIFF_IDENT],
engine.plural("file", dct[diff.DIFF_IDENT]),
dct[diff.DIFF_CHANGE],
engine.plural("file", dct[diff.DIFF_CHANGE]),
dct[diff.DIFF_NEW],
engine.plural("file", dct[diff.DIFF_NEW]),
dct[diff.DIFF_DEL],
engine.plural("file", dct[diff.DIFF_DEL]),
cls._print_size(dct[diff.DIFF_SIZE]),
)
return changes_msg

@classmethod
def _get_file_changes(cls, dct):
import dvc.repo.diff as diff

if (
dct.get(diff.DIFF_OLD_FILE)
and dct.get(diff.DIFF_NEW_FILE)
and dct[diff.DIFF_SIZE] == 0
):
msg = "file size was not changed"
elif dct.get(diff.DIFF_NEW_FILE):
msg = "added file with size {}".format(
humanize.naturalsize(dct[diff.DIFF_SIZE])
)
elif dct.get(diff.DIFF_OLD_FILE):
msg = "deleted file with size {}".format(
humanize.naturalsize(abs(dct[diff.DIFF_SIZE]))
)
else:
msg = "file was modified, file size {}".format(
cls._print_size(dct[diff.DIFF_SIZE])
)
return msg

@classmethod
def _get_royal_changes(cls, dct):
import dvc.repo.diff as diff
return "\n\n".join(groups)

if dct[diff.DIFF_SIZE] != diff.DIFF_SIZE_UNKNOWN:
if dct.get("is_dir"):
return cls._get_dir_changes(dct)
else:
return cls._get_file_changes(dct)
return "size is ?"
def run(self):
try:
diff = self.repo.diff(self.args.a_ref, self.args.b_ref)

@classmethod
def _show(cls, diff_dct):
import dvc.repo.diff as diff
if not any(diff.values()):
return 0

msg = "dvc diff from {} to {}".format(
diff_dct[diff.DIFF_A_REF], diff_dct[diff.DIFF_B_REF]
)
if diff_dct.get(diff.DIFF_EQUAL):
logger.info(msg)
return
for dct in diff_dct[diff.DIFF_LIST]:
msg += "\n\ndiff for '{}'\n".format(dct[diff.DIFF_TARGET])
msg += cls._get_md5_string(
"-",
dct.get(diff.DIFF_OLD_FILE),
dct.get(diff.DIFF_OLD_CHECKSUM),
)
msg += cls._get_md5_string(
"+",
dct.get(diff.DIFF_NEW_FILE),
dct.get(diff.DIFF_NEW_CHECKSUM),
)
msg += "\n"
msg += cls._get_royal_changes(dct)
logger.info(msg)
return msg
if not self.args.checksums:
for _, entries in diff.items():
for entry in entries:
del entry["checksum"]

if self.args.show_json:
res = json.dumps(diff)
else:
res = self._format(diff)

logger.info(res)

def run(self):
try:
msg = self.repo.diff(
self.args.a_ref, target=self.args.target, b_ref=self.args.b_ref
)
self._show(msg)
except DvcException:
msg = "failed to get 'diff {}'"
args = " ".join(
compact([self.args.target, self.args.a_ref, self.args.b_ref])
)
msg = msg.format(args)
logger.exception(msg)
logger.exception("failed to get diff")
return 1
return 0


def add_parser(subparsers, parent_parser):
DIFF_DESCRIPTION = (
"Show diff of a data file or a directory that is under DVC control.\n"
"Some basic statistics summary, how many files were deleted/changed."
"Compare two different versions of your DVC project (tracked by Git)"
" and shows a list of paths grouped in the following categories:"
" added, modified, or deleted."
)
DIFF_HELP = "Show a diff of a DVC controlled data file or a directory."
diff_parser = subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(DIFF_DESCRIPTION, "diff"),
help=DIFF_HELP,
help=DIFF_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
diff_parser.add_argument(
"-t",
"--target",
"a_ref",
help=(
"Source path to a data file or directory. Default None. "
"If not specified, compares all files and directories "
"that are under DVC control in the current working space."
"Git reference to the old version that you want to compare"
" (defaults to HEAD)"
),
)
diff_parser.add_argument(
"a_ref", help="Git reference from which diff calculates"
nargs="?",
default="HEAD",
)
diff_parser.add_argument(
"b_ref",
help=(
"Git reference until which diff calculates, if omitted "
"diff shows the difference between current HEAD and a_ref"
"Git reference to the new version that you want to compare."
" (defaults to the working tree)"
),
nargs="?",
)
diff_parser.add_argument(
"--show-json",
help="Format the output into a JSON",
action="store_true",
default=False,
)
diff_parser.add_argument(
"--checksums",
help="Display checksums for each entry",
action="store_true",
default=False,
)
diff_parser.set_defaults(func=CmdDiff)
Loading

0 comments on commit fd1e6f4

Please sign in to comment.