Skip to content

Commit

Permalink
Add Arrow - Velox conversion support (facebookincubator#4450)
Browse files Browse the repository at this point in the history
Summary:
This PR introduces PyVelox functions for the conversion of Arrow Arrays to Velox Vectors and vice-versa.

Pull Request resolved: facebookincubator#4450

Reviewed By: bikramSingh91

Differential Revision: D46570398

Pulled By: kgpai

fbshipit-source-id: cf0557ad26a568f10866683e59cfa2bd79040579
  • Loading branch information
sanjibansg authored and facebook-github-bot committed Jun 14, 2023
1 parent 5f663d5 commit 8e95993
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build_pyvelox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ jobs:
cp -R /host${{ github.workspace }}/.ccache /output/.ccache &&
ccache -s
CIBW_ENVIRONMENT_PASS_LINUX: CCACHE_DIR BUILD_VERSION
CIBW_TEST_EXTRAS: "tests"
CIBW_TEST_COMMAND: "cd {project}/pyvelox && python -m unittest -v"
CIBW_TEST_SKIP: "*macos*"
CCACHE_DIR: "${{ matrix.os != 'macos-11' && '/output' || github.workspace }}/.ccache"
Expand Down
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ python-clean:
DEBUG=1 ${PYTHON_EXECUTABLE} setup.py clean

python-build:
DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=4 ${PYTHON_EXECUTABLE} setup.py develop
DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=4 ${PYTHON_EXECUTABLE} -m pip install -e .$(extras) --verbose

python-test: python-build
python-test:
$(MAKE) python-build extras="[tests]"
DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v
3 changes: 2 additions & 1 deletion pyvelox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ if(VELOX_BUILD_PYTHON_PACKAGE)
include_directories(SYSTEM ${CMAKE_SOURCE_DIR})
add_definitions(-DCREATE_PYVELOX_MODULE -DVELOX_DISABLE_GOOGLETEST)
# Define our Python module:
pybind11_add_module(pyvelox MODULE pyvelox.cpp serde.cpp signatures.cpp)
pybind11_add_module(pyvelox MODULE pyvelox.cpp serde.cpp signatures.cpp
conversion.cpp)
# Link with Velox:
target_link_libraries(
pyvelox
Expand Down
52 changes: 52 additions & 0 deletions pyvelox/conversion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "conversion.h"
#include <velox/vector/arrow/Abi.h>
#include <velox/vector/arrow/Bridge.h>
#include "context.h"

namespace facebook::velox::py {

namespace py = pybind11;

void addConversionBindings(py::module& m, bool asModuleLocalDefinitions) {
m.def("export_to_arrow", [](VectorPtr& inputVector) {
auto arrowArray = std::make_unique<ArrowArray>();
auto pool_ = PyVeloxContext::getSingletonInstance().pool();
facebook::velox::exportToArrow(inputVector, *arrowArray, pool_);

auto arrowSchema = std::make_unique<ArrowSchema>();
facebook::velox::exportToArrow(inputVector, *arrowSchema);

py::module arrow_module = py::module::import("pyarrow");
py::object array_class = arrow_module.attr("Array");
return array_class.attr("_import_from_c")(
reinterpret_cast<uintptr_t>(arrowArray.get()),
reinterpret_cast<uintptr_t>(arrowSchema.get()));
});

m.def("import_from_arrow", [](py::object inputArrowArray) {
auto arrowArray = std::make_unique<ArrowArray>();
auto arrowSchema = std::make_unique<ArrowSchema>();
inputArrowArray.attr("_export_to_c")(
reinterpret_cast<uintptr_t>(arrowArray.get()),
reinterpret_cast<uintptr_t>(arrowSchema.get()));
auto pool_ = PyVeloxContext::getSingletonInstance().pool();
return importFromArrowAsOwner(*arrowSchema, *arrowArray, pool_);
});
}
} // namespace facebook::velox::py
34 changes: 34 additions & 0 deletions pyvelox/conversion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <pybind11/pybind11.h>

namespace facebook::velox::py {

namespace py = pybind11;

/// Adds bindings for arrow-velox conversion functions to module m.
///
/// @param m Module to add bindings to.
/// @param asModuleLocalDefinitions If true then these bindings are only
/// visible inside the module. Refer to
/// https://pybind11.readthedocs.io/en/stable/advanced/classes.html#module-local-class-bindings
/// for further details.
void addConversionBindings(py::module& m, bool asModuleLocalDefinitions = true);

} // namespace facebook::velox::py
2 changes: 2 additions & 0 deletions pyvelox/pyvelox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "pyvelox.h"
#include "conversion.h"
#include "serde.h"
#include "signatures.h"

Expand Down Expand Up @@ -294,6 +295,7 @@ PYBIND11_MODULE(pyvelox, m) {
addVeloxBindings(m);
addSignatureBindings(m);
addSerdeBindings(m);
addConversionBindings(m);
m.attr("__version__") = "dev";
}
#endif
Expand Down
52 changes: 50 additions & 2 deletions pyvelox/test/test_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import pyarrow as pa
import pyvelox.pyvelox as pv
import unittest


class TestVeloxVector(unittest.TestCase):
Expand Down Expand Up @@ -273,3 +273,51 @@ def test_slice(self):

with self.assertRaises(NotImplementedError):
e = a[3:8:3]

def test_export_to_arrow(self):
test_cases = [
([1, 2, 3], pa.int64()),
([1.1, 2.2, 3.3], pa.float64()),
(["ab", "bc", "ca"], pa.string()),
]
for data, expected_type in test_cases:
with self.subTest(data=data):
vector = pv.from_list(data)
array = pv.export_to_arrow(vector)

self.assertEqual(array.type, expected_type)
self.assertEqual(len(array), len(data))
self.assertListEqual(array.tolist(), data)

def test_import_from_arrow(self):
test_cases = [
([11, 26, 31], pa.int64(), pv.IntegerType()),
([0.1, 2.5, 3.9], pa.float64(), pv.DoubleType()),
(["az", "by", "cx"], pa.string(), pv.VarcharType()),
]
for data, dtype, expected_type in test_cases:
with self.subTest(data=data):
array = pa.array(data, type=dtype)
velox_vector = pv.import_from_arrow(array)

self.assertEqual(velox_vector.size(), len(data))
self.assertTrue(velox_vector.dtype(), expected_type)
for i in range(0, len(data)):
self.assertEqual(velox_vector[i], data[i])

def test_roundtrip_conversion(self):
test_cases = [
([41, 92, 13], pv.IntegerType()),
([17.19, 22.25, 13.3], pv.DoubleType()),
(["aa1", "bb2", "cc3"], pv.VarcharType()),
]
for data, expected_type in test_cases:
with self.subTest(data=data):
vector = pv.from_list(data)
array = pv.export_to_arrow(vector)

velox_vector = pv.import_from_arrow(array)
self.assertEqual(velox_vector.size(), len(data))
self.assertTrue(velox_vector.dtype(), expected_type)
for i in range(0, len(data)):
self.assertEqual(velox_vector[i], data[i])
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ def build_extension(self, ext):
"typing",
"tabulate",
"typing-inspect",
"pyarrow",
],
extras_require={"tests": ["pyarrow"]},
python_requires=">=3.7",
classifiers=[
"Intended Audience :: Developers",
Expand Down

0 comments on commit 8e95993

Please sign in to comment.