-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] dependency: fine grained (user cmd filter) #4363
Changes from all commits
a4fcb84
f1f7d54
ef27b35
b8bb670
6269f44
dae2d46
fc2d712
c67d0b8
f16b9ae
e58ae37
33b4e28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,9 @@ | |
import math | ||
import os | ||
import re | ||
import subprocess | ||
import sys | ||
import tempfile | ||
import time | ||
|
||
import colorama | ||
|
@@ -43,8 +45,10 @@ def _fobj_md5(fobj, hash_md5, binary, progress_func=None): | |
progress_func(len(data)) | ||
|
||
|
||
def file_md5(fname, tree=None): | ||
""" get the (md5 hexdigest, md5 digest) of a file """ | ||
def file_md5(fname, tree=None, cmd=None): | ||
""" | ||
Returns (md5_hexdigest, md5_digest) of `cmd file` (default: `cmd=cat`) | ||
""" | ||
from dvc.progress import Tqdm | ||
from dvc.istextfile import istextfile | ||
|
||
|
@@ -58,6 +62,21 @@ def file_md5(fname, tree=None): | |
open_func = open | ||
|
||
if exists_func(fname): | ||
filtered = None | ||
if cmd: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if there are aspects of |
||
p = subprocess.Popen( | ||
cmd.split() + [fname], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
) | ||
out, err = p.communicate() | ||
if p.returncode != 0: | ||
logger.error("filtering:%s %s", cmd, fname) | ||
raise RuntimeError(err) | ||
with tempfile.NamedTemporaryFile(delete=False) as fobj: | ||
logger.debug("filtering:%s %s > %s", cmd, fname, fobj.name) | ||
fobj.write(out) | ||
fname = filtered = fobj.name | ||
hash_md5 = hashlib.md5() | ||
binary = not istextfile(fname, tree=tree) | ||
size = stat_func(fname).st_size | ||
|
@@ -80,6 +99,10 @@ def file_md5(fname, tree=None): | |
with open_func(fname, "rb") as fobj: | ||
_fobj_md5(fobj, hash_md5, binary, pbar.update) | ||
|
||
if filtered is not None: | ||
from dvc.utils.fs import remove | ||
|
||
remove(filtered) | ||
Comment on lines
+102
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if this is required - maybe automatically handled elsewhere (i.e. entire tmpdir deleted before exit) |
||
return (hash_md5.hexdigest(), hash_md5.digest()) | ||
|
||
return (None, None) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe assert not required (should be handled by schema)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other CLI commands can create/load dependencies, skipping the schema. Good to have an assert.