Skip to content

Commit

Permalink
[C++, python] Optimizing indexer for panda by removing std::vector ma…
Browse files Browse the repository at this point in the history
…p_locations

Signed-off-by: Behnam Robatmili <[email protected]>
  • Loading branch information
beroy committed Feb 22, 2024
1 parent 6c737ae commit 6e53ea4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
7 changes: 0 additions & 7 deletions apis/python/src/tiledbsoma/reindexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,6 @@ void load_reindexer(py::module &m) {
size_t length = buffer.shape[0];
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
.def(
"map_locations",
[](IntIndexer& indexer,
std::vector<int64_t> keys,
int num_threads) {
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
// Perform lookup for a large input array of keys and return the looked
// up value array (passing ownership from C++ to python)
.def(
Expand Down
24 changes: 24 additions & 0 deletions apis/python/tests/test_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import pandas as pd
import pyarrow as pa
import pytest

from tiledbsoma._index_util import tiledbsoma_build_index
Expand Down Expand Up @@ -61,6 +62,29 @@ def test_duplicate_key_indexer_error(
],
),
(list(range(1, 10000)), list(range(1, 10000))),
(np.array(range(1, 10000)), np.array(range(1, 10000))),
(pa.array(range(1, 10000)), pa.array(range(1, 10000))),
(pd.array(range(1, 10000)), pd.array(range(1, 10000))),
(
pa.chunked_array(
[
list(range(1, 10000)),
list(range(10000, 20000)),
list(range(30000, 40000)),
]
),
pa.chunked_array(
[
list(range(1, 10000)),
list(range(10000, 20000)),
list(range(30000, 40000)),
]
),
),
(
pd.Series(list(range(1, 10000)), copy=False),
pd.Series(list(range(1, 10000)), copy=False),
),
],
)
def test_indexer(keys: np.array, lookups: np.array):
Expand Down

0 comments on commit 6e53ea4

Please sign in to comment.