Skip to content

Commit

Permalink
Fix performance issue in v2 tolist. (#1418)
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski authored Apr 14, 2022
1 parent 19d7e39 commit 00d6036
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 27 deletions.
9 changes: 5 additions & 4 deletions src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,12 @@ def _to_list(self, behavior):
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length]
content = self._content._to_list(behavior)
out = [None] * self._length
out = self._content._getitem_range(slice(0, self._length))._to_list(behavior)

for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
if not isvalid:
out[i] = None

return out

def _to_nplike(self, nplike):
Expand Down
9 changes: 5 additions & 4 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,11 +1035,12 @@ def _to_list(self, behavior):
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)
content = self._content._to_list(behavior)
out = [None] * self._mask.length
out = self._content._getitem_range(slice(0, len(mask)))._to_list(behavior)

for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
if not isvalid:
out[i] = None

return out

def _to_nplike(self, nplike):
Expand Down
7 changes: 2 additions & 5 deletions src/awkward/_v2/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,11 +1221,8 @@ def _to_list(self, behavior):
return out

index = self._index.raw(numpy)
content = self._content._to_list(behavior)
out = [None] * index.length
for i, ind in enumerate(index):
out[i] = content[ind]
return out
nextcontent = self._content._carry(ak._v2.index.Index(index), False)
return nextcontent._to_list(behavior)

def _to_nplike(self, nplike):
index = self._index._to_nplike(nplike)
Expand Down
16 changes: 11 additions & 5 deletions src/awkward/_v2/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1653,11 +1653,17 @@ def _to_list(self, behavior):
return out

index = self._index.raw(numpy)
content = self._content._to_list(behavior)
out = [None] * len(index)
for i, ind in enumerate(index):
if ind >= 0:
out[i] = content[ind]
not_missing = index >= 0

nextcontent = self._content._carry(
ak._v2.index.Index(index[not_missing]), False
)
out = nextcontent._to_list(behavior)

for i, isvalid in enumerate(not_missing):
if not isvalid:
out.insert(i, None)

return out

def _to_nplike(self, nplike):
Expand Down
33 changes: 24 additions & 9 deletions src/awkward/_v2/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2062,33 +2062,48 @@ def packed(self):
)

def _to_list(self, behavior):
starts, stops = self.starts, self.stops
starts_data = starts.raw(numpy)
stops_data = stops.raw(numpy)[: len(starts_data)]

nonempty = starts_data != stops_data
if numpy.count_nonzero(nonempty) == 0:
mini, maxi = 0, 0
else:
mini = starts_data.min()
maxi = stops_data.max()

starts_data = starts_data - mini
stops_data = stops_data - mini

nextcontent = self._content._getitem_range(slice(mini, maxi))

if self.parameter("__array__") == "bytestring":
content = ak._v2._util.tobytes(self._content.data)
starts, stops = self.starts, self.stops
content = ak._v2._util.tobytes(nextcontent.data)
out = [None] * starts.length
for i in range(starts.length):
out[i] = content[starts[i] : stops[i]]
out[i] = content[starts_data[i] : stops_data[i]]
return out

elif self.parameter("__array__") == "string":
content = ak._v2._util.tobytes(self._content.data)
starts, stops = self.starts, self.stops
content = ak._v2._util.tobytes(nextcontent.data)
out = [None] * starts.length
for i in range(starts.length):
out[i] = content[starts[i] : stops[i]].decode(errors="surrogateescape")
out[i] = content[starts_data[i] : stops_data[i]].decode(
errors="surrogateescape"
)
return out

else:
out = self._to_list_custom(behavior)
if out is not None:
return out

content = self._content._to_list(behavior)
starts, stops = self.starts, self.stops
content = nextcontent._to_list(behavior)
out = [None] * starts.length

for i in range(starts.length):
out[i] = content[starts[i] : stops[i]]
out[i] = content[starts_data[i] : stops_data[i]]
return out

def _to_nplike(self, nplike):
Expand Down
14 changes: 14 additions & 0 deletions src/awkward/_v2/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,13 @@ def fields(self):
"""
return self._layout.fields

@property
def is_tuple(self):
"""
If True, the top-most record structure has no named fields, i.e. it's a tuple.
"""
return self._layout.is_tuple

def _ipython_key_completions_(self):
return self._layout.fields

Expand Down Expand Up @@ -1580,6 +1587,13 @@ def fields(self):
"""
return self._layout.array.fields

@property
def is_tuple(self):
"""
If True, the top-most record structure has no named fields, i.e. it's a tuple.
"""
return self._layout.array.is_tuple

def _ipython_key_completions_(self):
return self._layout.array.fields

Expand Down

0 comments on commit 00d6036

Please sign in to comment.