-
Notifications
You must be signed in to change notification settings - Fork 83
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Drop uproot3 for uproot4 for writing ROOT files #1567
Conversation
I'm a bit stuck as I'm having some trouble understanding why the fllowing traceback isn't finding the directories =================================== FAILURES ===================================
______________________ test_import_roundtrip[example-one] ______________________
self = <[AttributeError("'MemmapSource' object has no attribute '_fallback'") raised in repr()] MemmapSource object at 0x7fe0b4a33590>
def _open(self):
try:
> self._file = numpy.memmap(self._file_path, dtype=self._dtype, mode="r")
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:113:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
subtype = <class 'numpy.memmap'>
filename = '/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0/()'
dtype = dtype('uint8'), mode = 'r', offset = 0, shape = None, order = 'C'
def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
shape=None, order='C'):
# Import here to minimize 'import numpy' overhead
import mmap
import os.path
try:
mode = mode_equivalents[mode]
except KeyError:
if mode not in valid_filemodes:
raise ValueError("mode must be one of %s" %
(valid_filemodes + list(mode_equivalents.keys())))
if mode == 'w+' and shape is None:
raise ValueError("shape must be given")
if hasattr(filename, 'read'):
f_ctx = contextlib_nullcontext(filename)
else:
> f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
E FileNotFoundError: [Errno 2] No such file or directory: '/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0/()'
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/numpy/core/memmap.py:225: FileNotFoundError
During handling of the above exception, another exception occurred:
self = <uproot.source.file.FileResource object at 0x7fe0b4a33650>
file_path = '/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0/()'
def __init__(self, file_path):
self._file_path = file_path
try:
> self._file = open(self._file_path, "rb")
E FileNotFoundError: [Errno 2] No such file or directory: '/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0/()'
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:37: FileNotFoundError
During handling of the above exception, another exception occurred:
tmpdir = local('/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0')
toplvl = 'validation/xmlimport_input/config/example.xml'
basedir = 'validation/xmlimport_input/'
@pytest.mark.parametrize(
'toplvl, basedir',
[
(
'validation/xmlimport_input/config/example.xml',
'validation/xmlimport_input/',
),
(
'validation/xmlimport_input2/config/example.xml',
'validation/xmlimport_input2',
),
(
'validation/xmlimport_input3/config/examples/example_ShapeSys.xml',
'validation/xmlimport_input3',
),
],
ids=['example-one', 'example-two', 'example-three'],
)
def test_import_roundtrip(tmpdir, toplvl, basedir):
parsed_xml_before = pyhf.readxml.parse(toplvl, basedir)
spec = {
'channels': parsed_xml_before['channels'],
'parameters': parsed_xml_before['measurements'][0]['config']['parameters'],
}
pdf_before = pyhf.Model(spec, poi_name='SigXsecOverSM')
tmpconfig = tmpdir.mkdir('config')
tmpdata = tmpdir.mkdir('data')
tmpxml = tmpdir.join('FitConfig.xml')
tmpxml.write(
pyhf.writexml.writexml(
parsed_xml_before,
tmpconfig.strpath,
tmpdata.strpath,
Path(tmpdir.strpath).joinpath('FitConfig'),
).decode('utf-8')
)
> parsed_xml_after = pyhf.readxml.parse(tmpxml.strpath, tmpdir.strpath)
tests/test_validation.py:846:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
src/pyhf/readxml.py:347: in parse
ET.parse(str(Path(rootdir).joinpath(inp))), rootdir, track_progress
src/pyhf/readxml.py:228: in process_channel
parsed_data = process_data(data[0], rootdir, inputfile, histopath)
src/pyhf/readxml.py:212: in process_data
data, _ = import_root_histogram(rootdir, inputfile, histopath, histoname)
src/pyhf/readxml.py:61: in import_root_histogram
f = uproot.open(fullpath)
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/reading.py:156: in open
**options # NOTE: a comma after **options breaks Python 2
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/reading.py:589: in __init__
file_path, **self._options # NOTE: a comma after **options breaks Python 2
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:109: in __init__
self._open()
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:120: in _open
self._file_path, **opts # NOTE: a comma after **opts breaks Python 2
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:257: in __init__
self._open()
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:263: in _open
for x in uproot._util.range(self._num_workers)
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:263: in <listcomp>
for x in uproot._util.range(self._num_workers)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <uproot.source.file.FileResource object at 0x7fe0b4a33650>
file_path = '/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0/()'
def __init__(self, file_path):
self._file_path = file_path
try:
self._file = open(self._file_path, "rb")
except uproot._util._FileNotFoundError:
> raise uproot._util._file_not_found(file_path)
E FileNotFoundError: file not found
E
E '/tmp/pytest-of-runner/pytest-0/test_import_roundtrip_example_0/()'
E
E Files may be specified as:
E * str/bytes: relative or absolute filesystem path or URL, without any colons
E other than Windows drive letter or URL schema.
E Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root"
E * str/bytes: same with an object-within-ROOT path, separated by a colon.
E Example: "rel/file.root:tdirectory/ttree"
E * pathlib.Path: always interpreted as a filesystem path or URL only (no
E object-within-ROOT path), regardless of whether there are any colons.
E Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")
E
E Functions that accept many files (uproot.iterate, etc.) also allow:
E * glob syntax in str/bytes and pathlib.Path.
E Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
E * dict: keys are filesystem paths, values are objects-within-ROOT paths.
E Example: {"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}
E * already-open TTree objects.
E * iterables of the above.
/opt/hostedtoolcache/Python/3.7.11/x64/lib/python3.7/site-packages/uproot/source/file.py:39: FileNotFoundError If I run this locally $ python -m pytest -sx tests/test_validation.py -k test_import_roundtrip[example-one]
...
self = <uproot.source.file.FileResource object at 0x7f7057103820>, file_path = '/tmp/pytest-of-feickert/pytest-42/test_import_roundtrip_example_0/()'
def __init__(self, file_path):
self._file_path = file_path
try:
self._file = open(self._file_path, "rb")
except uproot._util._FileNotFoundError:
> raise uproot._util._file_not_found(file_path)
E FileNotFoundError: file not found
E
E '/tmp/pytest-of-feickert/pytest-42/test_import_roundtrip_example_0/()'
E
E Files may be specified as:
E * str/bytes: relative or absolute filesystem path or URL, without any colons
E other than Windows drive letter or URL schema.
E Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root"
E * str/bytes: same with an object-within-ROOT path, separated by a colon.
E Example: "rel/file.root:tdirectory/ttree"
E * pathlib.Path: always interpreted as a filesystem path or URL only (no
E object-within-ROOT path), regardless of whether there are any colons.
E Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")
E
E Functions that accept many files (uproot.iterate, etc.) also allow:
E * glob syntax in str/bytes and pathlib.Path.
E Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
E * dict: keys are filesystem paths, values are objects-within-ROOT paths.
E Example: {"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}
E * already-open TTree objects.
E * iterables of the above.
../../../.pyenv/versions/pyhf-CPU/lib/python3.8/site-packages/uproot/source/file.py:39: FileNotFoundError The directory exists
Nothing should have changed in I'll tag @jpivarski in case he can think of something that is obvious that I'm missing, but I'm not expecting you to work on this PR Jim as my guess is that something subtle has changed that I need to fix in |
The first thing I'm noticing is that the filename has parentheses in it. That might be some string formatting (maybe in Uproot) gone awry. This is the error message for "can't find a file at that file path" and I'd be surprised if your filename really did have parentheses in it.
|
@jpivarski Yeah, all the filenames are just
It seems maybe so(?). If I revert the following diff diff --git a/src/pyhf/writexml.py b/src/pyhf/writexml.py
index f1595c57..650c313a 100644
--- a/src/pyhf/writexml.py
+++ b/src/pyhf/writexml.py
@@ -6,9 +6,7 @@
import xml.etree.ElementTree as ET
import numpy as np
-# TODO: Move to uproot4 when ROOT file writing is supported
-import uproot3 as uproot
-from uproot3_methods.classes import TH1
+import uproot
from pyhf.mixins import _ChannelSummaryMixin
@@ -48,11 +46,11 @@ def _make_hist_name(channel, sample, modifier='', prefix='hist', suffix=''):
def _export_root_histogram(histname, data):
- hist = TH1.from_numpy((np.asarray(data), np.arange(len(data) + 1)))
- hist._fName = histname
if histname in _ROOT_DATA_FILE:
raise KeyError(f"Duplicate key {histname} being written.")
- _ROOT_DATA_FILE[histname] = hist
+ _ROOT_DATA_FILE[histname] = np.histogram(
+ np.asarray(data), bins=np.arange(len(data) + 1)
+ )
# https://stackoverflow.com/a/4590052 so that I'm using
passes. I don't think(?) the assignment to Lines 288 to 303 in ce2ffab
|
I wonder if Lines 265 to 268 in ce2ffab
is causing problems. |
Ah seems so. If I add in the following addition $ git diff
diff --git a/src/pyhf/writexml.py b/src/pyhf/writexml.py
index 650c313a..e9391ae8 100644
--- a/src/pyhf/writexml.py
+++ b/src/pyhf/writexml.py
@@ -261,6 +261,7 @@ def build_data(obsspec, channelname):
def build_channel(spec, channelspec, obsspec):
+ print(f"\n{type(_ROOT_DATA_FILE)}: {_ROOT_DATA_FILE._path=}\n")
channel = ET.Element(
'Channel', Name=channelspec['name'], InputFile=_ROOT_DATA_FILE._path
) then with the $ python -m pytest -sx tests/test_validation.py -k test_import_roundtrip[example-one]
...
<class 'uproot.writing.writable.WritableDirectory'>: _ROOT_DATA_FILE._path=()
... If I revert back to $ python -m pytest -sx tests/test_validation.py -k test_import_roundtrip[example-one]
...
<class 'uproot3.write.TFile.TFileRecreate'>: _ROOT_DATA_FILE._path='/tmp/pytest-of-feickert/pytest-54/test_import_roundtrip_example_0/data/data.root'
... where for both Lines 288 to 290 in ce2ffab
|
Debugging note to self: If I print out the Lines 846 to 850 in ce2ffab
and then compare them for $ diff -y /tmp/uproot3_spec.py /tmp/uproot4_spec.py
{ {
"channels": [ "channels": [
{ {
"name": "channel1", "name": "channel1",
"samples": [ "samples": [
{ {
"name": "signal", "name": "signal",
"data": [20.0, 10.0], | "data": [0.0, 0.0],
"modifiers": [ "modifiers": [
{ {
"name": "syst1", "name": "syst1",
"type": "normsys", "type": "normsys",
"data": {"lo": 0.95, "hi": 1.05}, "data": {"lo": 0.95, "hi": 1.05},
}, },
{"name": "SigXsecOverSM", "type": "no {"name": "SigXsecOverSM", "type": "no
], ],
}, },
{ {
"name": "background1", "name": "background1",
"data": [100.0, 0.0], | "data": [1.0, 0.0],
"modifiers": [ "modifiers": [
{"name": "lumi", "type": "lumi", "dat {"name": "lumi", "type": "lumi", "dat
{ {
"name": "staterror_channel1", "name": "staterror_channel1",
"type": "staterror", "type": "staterror",
"data": [5.000000074505806, 0.0], | "data": [2.0, 0.0],
}, },
{ {
"name": "syst2", "name": "syst2",
"type": "normsys", "type": "normsys",
"data": {"lo": 0.95, "hi": 1.05}, "data": {"lo": 0.95, "hi": 1.05},
}, },
], ],
}, },
{ {
"name": "background2", "name": "background2",
"data": [0.0, 100.0], | "data": [1.0, 0.0],
"modifiers": [ "modifiers": [
{"name": "lumi", "type": "lumi", "dat {"name": "lumi", "type": "lumi", "dat
{ {
"name": "staterror_channel1", "name": "staterror_channel1",
"type": "staterror", "type": "staterror",
"data": [0.0, 10.0], | "data": [2.0, 0.0],
}, },
{ {
"name": "syst3", "name": "syst3",
"type": "normsys", "type": "normsys",
"data": {"lo": 0.95, "hi": 1.05}, "data": {"lo": 0.95, "hi": 1.05},
}, },
], ],
}, },
], ],
} }
], ],
"parameters": [ "parameters": [
{ {
"name": "lumi", "name": "lumi",
"auxdata": [1.0], "auxdata": [1.0],
"bounds": [[0.5, 1.5]], "bounds": [[0.5, 1.5]],
"inits": [1.0], "inits": [1.0],
"sigmas": [0.1], "sigmas": [0.1],
"fixed": True, "fixed": True,
}, },
{"name": "SigXsecOverSM", "bounds": [[0.0, 3.0]], "in {"name": "SigXsecOverSM", "bounds": [[0.0, 3.0]], "in
{"name": "syst1", "fixed": True}, {"name": "syst1", "fixed": True},
], ],
} } the only parts that differ are the $ diff /tmp/uproot3_spec.py /tmp/uproot4_spec.py
8c8
< "data": [20.0, 10.0],
---
> "data": [0.0, 0.0],
20c20
< "data": [100.0, 0.0],
---
> "data": [1.0, 0.0],
26c26
< "data": [5.000000074505806, 0.0],
---
> "data": [2.0, 0.0],
37c37
< "data": [0.0, 100.0],
---
> "data": [1.0, 0.0],
43c43
< "data": [0.0, 10.0],
---
> "data": [2.0, 0.0], so I need to look at how those are getting written. From Lines 234 to 253 in ce2ffab
it is clear to me then that how I've written |
Yeah, I wrote it 100% wrong. Don't write code when very tired is the obvious lesson here. |
Codecov Report
@@ Coverage Diff @@
## master #1567 +/- ##
==========================================
- Coverage 97.70% 97.70% -0.01%
==========================================
Files 63 63
Lines 4050 4047 -3
Branches 576 576
==========================================
- Hits 3957 3954 -3
Misses 54 54
Partials 39 39
Flags with carried forward coverage won't be shown. Click here to find out more.
Continue to review full report at Codecov.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks great! I just have a few suggestions that could streamline things, nothing critical.
tests/test_export.py
Outdated
|
||
with uproot.recreate(tmp_path.joinpath("test_export_root_histogram.root")) as file: | ||
file["hist"] = pyhf.writexml._ROOT_DATA_FILE["example"] | ||
values, edges = file["hist"].to_numpy() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This literally writes and reads the data back. If you want to keep an Uproot Model of the data for later processing, you could use uproot.to_writable
to turn it into a Model, which is implicitly the first step in writing anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was more just meant as a very simple sanity check that pyhf.writexml._export_root_histogram
is writing what we expect it to into the ROOT file. I see though that
>>> import uproot
>>> import numpy as np
>>> data = np.arange(10)
>>> edges = np.arange(len(data) + 1)
>>> uproot.to_writable((data, edges))
<TH1D (version 3) at 0x7f421bf86df0>
verifies the structure. So in your mind should I add the suggestion that I've placed in pyhf.writexml._export_root_histogram
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it's a sanity check, then writing and reading back is a good idea. It would be even more of a sanity check if you did so with different file objects:
with uproot.recreate(tmp_path.joinpath("test_export_root_histogram.root")) as file:
file["hist"] = pyhf.writexml._ROOT_DATA_FILE["example"]
with uproot.open(tmp_path.joinpath("test_export_root_histogram.root")) as file:
values, edges = file["hist"].to_numpy()
That would test more—the whole file structure, and the certainty that these really are bytes on disk. Using the same file
does invoke the system operation to read from the file it's just written to, but with the same file handle in "r+b"
mode and without going back to the beginning of the file and seeking through all the pointers to get to the object again. to_writable
does still less, just turning the 2-tuple of arrays Python object into an Uproot Model Python object, which is what you'd want to do if performance were your goal, rather than sanity-checking.
So it's up to you, depending on what your goal is here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. I've added this in the latest commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the super fast response, @jpivarski! Just one follow up question for you though.
tests/test_export.py
Outdated
|
||
with uproot.recreate(tmp_path.joinpath("test_export_root_histogram.root")) as file: | ||
file["hist"] = pyhf.writexml._ROOT_DATA_FILE["example"] | ||
values, edges = file["hist"].to_numpy() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was more just meant as a very simple sanity check that pyhf.writexml._export_root_histogram
is writing what we expect it to into the ROOT file. I see though that
>>> import uproot
>>> import numpy as np
>>> data = np.arange(10)
>>> edges = np.arange(len(data) + 1)
>>> uproot.to_writable((data, edges))
<TH1D (version 3) at 0x7f421bf86df0>
verifies the structure. So in your mind should I add the suggestion that I've placed in pyhf.writexml._export_root_histogram
?
src/pyhf/writexml.py
Outdated
if histname in _ROOT_DATA_FILE: | ||
raise KeyError(f"Duplicate key {histname} being written.") | ||
_ROOT_DATA_FILE[histname] = hist | ||
_ROOT_DATA_FILE[histname] = (np.asarray(data), np.arange(len(data) + 1)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_ROOT_DATA_FILE[histname] = (np.asarray(data), np.arange(len(data) + 1)) | |
_ROOT_DATA_FILE[histname] = uproot.to_writable( | |
(np.asarray(data), np.arange(len(data) + 1)) | |
) |
@jpivarski given your other comment, this seems better then directly writing, correct? As if this was ever messed up then we'd get a TypeError
raised.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just
writable = uproot.to_writable((np.asarray(data), np.arange(len(data) + 1)))
by itself ensures that the data passed to to_writable
will be writable. It will raise a TypeError if it is not recognized as a writable Python type.
The next step of assigning to the file,
_ROOT_DATA_FILE[histname] = writable
actually writes it to the file. The disk I/O operations are only invoked when this assignment happens, not when the to_writable
function is called.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The disk I/O operations are only invoked when this assignment happens, not when the
to_writable
function is called.
Right. Okay, then it seems what I have here (now) is doing as expected. 👍
when _export_root_histogram does ``` _ROOT_DATA_FILE[histname] = hist_uproot_model ``` this is doing the assignment of `hsitname` as the name
9ba2c7e
to
72fa349
Compare
I bumped the lower bound to |
Description
Resolves #1566
Drop all usages of
uproot3
and only useuproot
v4.1.1+
for all reading and writing of ROOT files.Checklist Before Requesting Reviewer
Before Merging
For the PR Assignees: