Skip to content

Commit

Permalink
[python] Reorganize Reindexer bindings into separate file (#2110)
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv authored Feb 6, 2024
1 parent 6a0b7b7 commit 63827e6
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 60 deletions.
1 change: 1 addition & 0 deletions apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def run(self):
"tiledbsoma.pytiledbsoma",
[
"src/tiledbsoma/common.cc",
"src/tiledbsoma/reindexer.cc",
"src/tiledbsoma/query_condition.cc",
"src/tiledbsoma/soma_array.cc",
"src/tiledbsoma/soma_object.cc",
Expand Down
62 changes: 2 additions & 60 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <tiledbsoma/tiledbsoma>
#include <tiledbsoma/reindexer/reindexer.h>

#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
Expand All @@ -21,6 +20,7 @@ void load_soma_array(py::module &);
void load_soma_object(py::module &);
void load_soma_dataframe(py::module &);
void load_query_condition(py::module &);
void load_reindexer(py::module &);

PYBIND11_MODULE(pytiledbsoma, m) {
py::register_exception<TileDBSOMAError>(m, "SOMAError");
Expand Down Expand Up @@ -89,69 +89,11 @@ PYBIND11_MODULE(pytiledbsoma, m) {
},
"Print TileDB internal statistics. Lifecycle: experimental.");

// Efficient C++ re-indexing (aka hashing unique key values to an index
// between 0 and number of keys - 1) based on khash
py::class_<IntIndexer>(m, "IntIndexer")
.def(py::init<>())
.def(py::init<std::vector<int64_t>&, int>())
.def(
"map_locations",
[](IntIndexer& indexer,
py::array_t<int64_t> keys,
int num_threads) {
auto buffer = keys.request();
int64_t* data = static_cast<int64_t*>(buffer.ptr);
size_t length = buffer.shape[0];
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
.def(
"map_locations",
[](IntIndexer& indexer,
std::vector<int64_t> keys,
int num_threads) {
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
// Perform lookup for a large input array of keys and return the looked
// up value array (passing ownership from C++ to python)
.def(
"get_indexer",
[](IntIndexer& indexer, py::array_t<int64_t> lookups) {
auto input_buffer = lookups.request();
int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
size_t size = input_buffer.shape[0];
auto results = py::array_t<int64_t>(size);
auto results_buffer = results.request();
size_t results_size = results_buffer.shape[0];

int64_t* results_ptr = static_cast<int64_t*>(
results_buffer.ptr);

indexer.lookup(input_ptr, results_ptr, size);
return results;
})
// Perform lookup for a large input array of keys and writes the looked
// up values into previously allocated array (works for the cases in
// which python and R pre-allocate the array)
.def(
"get_indexer",
[](IntIndexer& indexer,
py::array_t<int64_t> lookups,
py::array_t<int64_t>& results) {
auto input_buffer = lookups.request();
int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
size_t size = input_buffer.shape[0];

auto results_buffer = results.request();
int64_t* results_ptr = static_cast<int64_t*>(
results_buffer.ptr);
size_t results_size = input_buffer.shape[0];
indexer.lookup(input_ptr, input_ptr, size);
});

load_soma_array(m);
load_soma_object(m);
load_soma_dataframe(m);
load_query_condition(m);
load_reindexer(m);
}

};
104 changes: 104 additions & 0 deletions apis/python/src/tiledbsoma/reindexer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* @file indexer.cc
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* This file defines the Reindexer bindings.
*/

#include <tiledbsoma/reindexer/reindexer.h>

#include "common.h"

#define DENUM(x) .value(#x, TILEDB_##x)
namespace libtiledbsomacpp {

namespace py = pybind11;
using namespace py::literals;
using namespace tiledbsoma;

void load_reindexer(py::module &m) {
// Efficient C++ re-indexing (aka hashing unique key values to an index
// between 0 and number of keys - 1) based on khash
py::class_<IntIndexer>(m, "IntIndexer")
.def(py::init<>())
.def(py::init<std::vector<int64_t>&, int>())
.def(
"map_locations",
[](IntIndexer& indexer,
py::array_t<int64_t> keys,
int num_threads) {
auto buffer = keys.request();
int64_t* data = static_cast<int64_t*>(buffer.ptr);
size_t length = buffer.shape[0];
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
.def(
"map_locations",
[](IntIndexer& indexer,
std::vector<int64_t> keys,
int num_threads) {
indexer.map_locations(keys.data(), keys.size(), num_threads);
})
// Perform lookup for a large input array of keys and return the looked
// up value array (passing ownership from C++ to python)
.def(
"get_indexer",
[](IntIndexer& indexer, py::array_t<int64_t> lookups) {
auto input_buffer = lookups.request();
int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
size_t size = input_buffer.shape[0];
auto results = py::array_t<int64_t>(size);
auto results_buffer = results.request();
size_t results_size = results_buffer.shape[0];

int64_t* results_ptr = static_cast<int64_t*>(
results_buffer.ptr);

indexer.lookup(input_ptr, results_ptr, size);
return results;
})
// Perform lookup for a large input array of keys and writes the looked
// up values into previously allocated array (works for the cases in
// which python and R pre-allocate the array)
.def(
"get_indexer",
[](IntIndexer& indexer,
py::array_t<int64_t> lookups,
py::array_t<int64_t>& results) {
auto input_buffer = lookups.request();
int64_t* input_ptr = static_cast<int64_t*>(input_buffer.ptr);
size_t size = input_buffer.shape[0];

auto results_buffer = results.request();
int64_t* results_ptr = static_cast<int64_t*>(
results_buffer.ptr);
size_t results_size = input_buffer.shape[0];
indexer.lookup(input_ptr, input_ptr, size);
});
}
} // namespace tiledbsoma

0 comments on commit 63827e6

Please sign in to comment.