Skip to content

Commit

Permalink
resize matches
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Sep 11, 2024
1 parent b15b7ad commit 7a6acba
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 36 deletions.
44 changes: 9 additions & 35 deletions src/akimbo/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,31 +160,6 @@ def get_avro_schema(
return form


def _merge(ind1, ind2, builder):
"""numba jittable left join/merge index finder"""
len2 = len(ind2)
j = 0
for i in ind1:
builder.begin_list()
while True:
if j >= len2:
break
if i > ind2[j]:
# ID not yet found
j += 1
continue
if i < ind2[j]:
# no more entrie
break
# hit
while True:
builder.append(j)
j += 1
if j >= len2 or i != ind2[j]:
break
builder.end_list()


_jitted = [None]


Expand Down Expand Up @@ -224,22 +199,21 @@ def join(
merge = _jitted[0]
else:
merge = _merge
# builder = ak.ArrayBuilder()
# merge(table1[key], table2[key], builder)
# merge_index = builder.snapshot()
# indexed = table2[ak.flatten(merge_index)]
# counts = ak.num(merge_index)

counts, matches = merge(table1[key], table2[key])
counts = np.empty(len(table1), dtype="uint64")
# TODO: the line below over-allocates, can swithch to somehing growable
matches = np.empty(len(table2), dtype="uint64")
# TODO: to_numpy(allow_missong) makes this a bit faster, but is not
# not GPU general
counts, matches, ind = merge(table1[key], table2[key], counts, matches)
matches.resize(int(ind), refcheck=False)
indexed = table2[matches]
listy = ak.unflatten(indexed, counts)
return ak.with_field(table1, listy, colname)


def _merge(ind1, ind2):
def _merge(ind1, ind2, counts, matches):
len2 = len(ind2)
counts = np.empty(len(ind1), dtype="uint32")
matches = np.empty(len2, dtype="uint64")
j = 0
offind = 0
matchind = 0
Expand All @@ -265,4 +239,4 @@ def _merge(ind1, ind2):
counts[offind] = matchind - last
last = matchind
offind += 1
return counts, matches
return counts, matches, matchind
4 changes: 3 additions & 1 deletion src/akimbo/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,9 @@ def to_arrow(cls, data):
@property
def array(self) -> ak.Array:
"""Data as an awkward array"""
return ak.with_name(ak.from_arrow(self.arrow), self._behavior)
if self._behavior:
return ak.with_name(ak.from_arrow(self.arrow), self._behavior)
return ak.from_arrow(self.arrow)

@classmethod
def register_accessor(cls, name, klass):
Expand Down

0 comments on commit 7a6acba

Please sign in to comment.