Skip to content

Commit

Permalink
feat: show similar keys first on KeyInFile error
Browse files Browse the repository at this point in the history
  • Loading branch information
andrzejnovak committed Oct 19, 2020
1 parent 57fafcf commit 0e4deb2
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 75 deletions.
77 changes: 2 additions & 75 deletions uproot4/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@
from uproot4.reading import ReadOnlyFile
from uproot4.reading import ReadOnlyDirectory

from uproot4.exceptions import KeyInFileError

from uproot4.model import Model
from uproot4.model import classname_decode
from uproot4.model import classname_encode
Expand Down Expand Up @@ -209,79 +211,4 @@ def behavior_of(classname):

del pkgutil


class KeyInFileError(KeyError):
"""
Exception raised by attempts to find ROOT objects in ``TDirectories``
or ``TBranches`` in :py:class:`~uproot4.behaviors.TBranch.HasBranches`, which
both have a Python ``Mapping`` interface (square bracket syntax to extract
items).
This exception descends from Python's ``KeyError``, so it can be used in
the normal way by interfaces that expect a missing item in a ``Mapping``
to raise ``KeyError``, but it provides more information, depending on
availability:
* ``because``: an explanatory message
* ``cycle``: the ROOT cycle number requested, if any
* ``keys``: a list or partial list of keys that *are* in the object, in case
of misspelling
* ``file_path``: a path (or URL) to the file
* ``object_path``: a path to the object within the ROOT file.
"""

def __init__(
self, key, because="", cycle=None, keys=None, file_path=None, object_path=None
):
super(KeyInFileError, self).__init__(key)
self.key = key
self.because = because
self.cycle = cycle
self.keys = keys
self.file_path = file_path
self.object_path = object_path

def __str__(self):
if self.because == "":
because = ""
else:
because = " because " + self.because

with_keys = ""
if self.keys is not None:
to_show = None
for key in self.keys:
if to_show is None:
to_show = repr(key)
else:
to_show += ", " + repr(key)
if len(to_show) > 200:
to_show += "..."
break
if to_show is None:
to_show = "(none!)"
with_keys = "\n\n Known keys: {0}\n".format(to_show)

in_file = ""
if self.file_path is not None:
in_file = "\nin file {0}".format(self.file_path)

in_object = ""
if self.object_path is not None:
in_object = "\nin object {0}".format(self.object_path)

if self.cycle == "any":
return """not found: {0} (with any cycle number){1}{2}{3}{4}""".format(
repr(self.key), because, with_keys, in_file, in_object
)
elif self.cycle is None:
return """not found: {0}{1}{2}{3}{4}""".format(
repr(self.key), because, with_keys, in_file, in_object
)
else:
return """not found: {0} with cycle {1}{2}{3}{4}{5}""".format(
repr(self.key), self.cycle, because, with_keys, in_file, in_object
)


from uproot4._util import no_filter
50 changes: 50 additions & 0 deletions uproot4/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,3 +635,53 @@ def awkward_form_of_iter(awkward1, form):
)
else:
raise RuntimeError("unrecognized form: {0}".format(type(form)))

def damerau_levenshtein(a, b, ratio=False):
"""Modified Damerau-Levenshtein distance. Adds a middling penalty
for capitalization.
Parameters
----------
a : str
b : str
ratio : bool, optional, default False
Return ratio (len(a)+len(b) - DLH)/(len(a)+len(b))
Returns
-------
float
Modified Damerau Levenshtein distance
"""
M = [[0]*(len(b)+1) for i in range(len(a)+1)]

for i in range(len(a)+1):
M[i][0] = i
for j in range(len(b)+1):
M[0][j] = j

for i in range(1, len(a)+1):
for j in range(1, len(b)+1):
if a[i-1] == b[j-1]: # Same char
cost = 0
elif a[i-1].lower() == b[j-1].lower(): # Same if lowered
cost = 0.5
else: # Different char
cost = 2
M[i][j] = min(M[i-1][j] + 1, # Addition
M[i][j-1] + 1, # Removal
M[i-1][j-1] + cost # Substitution
)

# Transposition
if i > 1 and j > 1 and a[i-1].lower() == b[j-2].lower() and a[i-2].lower() == b[j-2].lower():
if a[i-1] == b[j-2] and a[i-2] == b[j-1]:
# Transpose only
M[i][j] = min(M[i][j], M[i-2][j-2] + 1)
else:
# Traspose and capitalization
M[i][j] = min(M[i][j], M[i-2][j-2] + 1.5)

if not ratio:
return M[len(a)][len(b)]
else:
return ((len(a)+len(b)) - M[len(a)][len(b)] / (len(a)+len(b)))
79 changes: 79 additions & 0 deletions uproot4/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import numpy as np
from uproot4._util import damerau_levenshtein

class KeyInFileError(KeyError):
"""
Exception raised by attempts to find ROOT objects in ``TDirectories``
or ``TBranches`` in :py:class:`~uproot4.behaviors.TBranch.HasBranches`, which
both have a Python ``Mapping`` interface (square bracket syntax to extract
items).
This exception descends from Python's ``KeyError``, so it can be used in
the normal way by interfaces that expect a missing item in a ``Mapping``
to raise ``KeyError``, but it provides more information, depending on
availability:
* ``because``: an explanatory message
* ``cycle``: the ROOT cycle number requested, if any
* ``keys``: a list or partial list of keys that *are* in the object, in case
of misspelling
* ``file_path``: a path (or URL) to the file
* ``object_path``: a path to the object within the ROOT file.
"""

def __init__(
self, key, because="", cycle=None, keys=None, file_path=None, object_path=None
):
super(KeyInFileError, self).__init__(key)
self.key = key
self.because = because
self.cycle = cycle
self.keys = keys
self.file_path = file_path
self.object_path = object_path

def __str__(self):
if self.because == "":
because = ""
else:
because = " because " + self.because

with_keys = ""
if self.keys is not None:
to_show = None
distance = []
for key in self.keys:
distance.append(damerau_levenshtein(self.key, key))
self.keys = np.array(self.keys)[np.argsort(distance)]
for key in self.keys:
if to_show is None:
to_show = repr(key)
else:
to_show += ", " + repr(key)
if len(to_show) > 200:
to_show += "..."
break
if to_show is None:
to_show = "(none!)"
with_keys = "\n\n Available keys: {0}\n".format(to_show)

in_file = ""
if self.file_path is not None:
in_file = "\nin file {0}".format(self.file_path)

in_object = ""
if self.object_path is not None:
in_object = "\nin object {0}".format(self.object_path)

if self.cycle == "any":
return """not found: {0} (with any cycle number){1}{2}{3}{4}""".format(
repr(self.key), because, with_keys, in_file, in_object
)
elif self.cycle is None:
return """not found: {0}{1}{2}{3}{4}""".format(
repr(self.key), because, with_keys, in_file, in_object
)
else:
return """not found: {0} with cycle {1}{2}{3}{4}{5}""".format(
repr(self.key), self.cycle, because, with_keys, in_file, in_object
)

0 comments on commit 0e4deb2

Please sign in to comment.