Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix performance issue in v2 tolist. #1418

Merged
merged 1 commit into from
Apr 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,12 @@ def _to_list(self, behavior):
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length]
content = self._content._to_list(behavior)
out = [None] * self._length
out = self._content._getitem_range(slice(0, self._length))._to_list(behavior)

for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
if not isvalid:
out[i] = None

return out

def _to_nplike(self, nplike):
Expand Down
9 changes: 5 additions & 4 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,11 +1035,12 @@ def _to_list(self, behavior):
return out

mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)
content = self._content._to_list(behavior)
out = [None] * self._mask.length
out = self._content._getitem_range(slice(0, len(mask)))._to_list(behavior)

for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
if not isvalid:
out[i] = None

return out

def _to_nplike(self, nplike):
Expand Down
7 changes: 2 additions & 5 deletions src/awkward/_v2/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,11 +1221,8 @@ def _to_list(self, behavior):
return out

index = self._index.raw(numpy)
content = self._content._to_list(behavior)
out = [None] * index.length
for i, ind in enumerate(index):
out[i] = content[ind]
return out
nextcontent = self._content._carry(ak._v2.index.Index(index), False)
return nextcontent._to_list(behavior)

def _to_nplike(self, nplike):
index = self._index._to_nplike(nplike)
Expand Down
16 changes: 11 additions & 5 deletions src/awkward/_v2/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1653,11 +1653,17 @@ def _to_list(self, behavior):
return out

index = self._index.raw(numpy)
content = self._content._to_list(behavior)
out = [None] * len(index)
for i, ind in enumerate(index):
if ind >= 0:
out[i] = content[ind]
not_missing = index >= 0

nextcontent = self._content._carry(
ak._v2.index.Index(index[not_missing]), False
)
out = nextcontent._to_list(behavior)

for i, isvalid in enumerate(not_missing):
if not isvalid:
out.insert(i, None)

return out

def _to_nplike(self, nplike):
Expand Down
33 changes: 24 additions & 9 deletions src/awkward/_v2/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2062,33 +2062,48 @@ def packed(self):
)

def _to_list(self, behavior):
starts, stops = self.starts, self.stops
starts_data = starts.raw(numpy)
stops_data = stops.raw(numpy)[: len(starts_data)]

nonempty = starts_data != stops_data
if numpy.count_nonzero(nonempty) == 0:
mini, maxi = 0, 0
else:
mini = starts_data.min()
maxi = stops_data.max()

starts_data = starts_data - mini
stops_data = stops_data - mini

nextcontent = self._content._getitem_range(slice(mini, maxi))

if self.parameter("__array__") == "bytestring":
content = ak._v2._util.tobytes(self._content.data)
starts, stops = self.starts, self.stops
content = ak._v2._util.tobytes(nextcontent.data)
out = [None] * starts.length
for i in range(starts.length):
out[i] = content[starts[i] : stops[i]]
out[i] = content[starts_data[i] : stops_data[i]]
return out

elif self.parameter("__array__") == "string":
content = ak._v2._util.tobytes(self._content.data)
starts, stops = self.starts, self.stops
content = ak._v2._util.tobytes(nextcontent.data)
out = [None] * starts.length
for i in range(starts.length):
out[i] = content[starts[i] : stops[i]].decode(errors="surrogateescape")
out[i] = content[starts_data[i] : stops_data[i]].decode(
errors="surrogateescape"
)
return out

else:
out = self._to_list_custom(behavior)
if out is not None:
return out

content = self._content._to_list(behavior)
starts, stops = self.starts, self.stops
content = nextcontent._to_list(behavior)
out = [None] * starts.length

for i in range(starts.length):
out[i] = content[starts[i] : stops[i]]
out[i] = content[starts_data[i] : stops_data[i]]
return out

def _to_nplike(self, nplike):
Expand Down
14 changes: 14 additions & 0 deletions src/awkward/_v2/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,13 @@ def fields(self):
"""
return self._layout.fields

@property
def is_tuple(self):
"""
If True, the top-most record structure has no named fields, i.e. it's a tuple.
"""
return self._layout.is_tuple

def _ipython_key_completions_(self):
return self._layout.fields

Expand Down Expand Up @@ -1580,6 +1587,13 @@ def fields(self):
"""
return self._layout.array.fields

@property
def is_tuple(self):
"""
If True, the top-most record structure has no named fields, i.e. it's a tuple.
"""
return self._layout.array.is_tuple

def _ipython_key_completions_(self):
return self._layout.array.fields

Expand Down