diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 77326ce38d754..102a8a1853f3e 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -20,6 +20,7 @@ install(FILES api.h array.h builder.h + field.h type.h DESTINATION include/arrow) diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt index b51258ffd8b0d..68bf3148a9889 100644 --- a/cpp/src/arrow/table/CMakeLists.txt +++ b/cpp/src/arrow/table/CMakeLists.txt @@ -36,6 +36,9 @@ SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX) # Headers: top level install(FILES + column.h + schema.h + table.h DESTINATION include/arrow/table) ADD_ARROW_TEST(column-test) diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000000000..80103a1a52942 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,37 @@ +thirdparty/ +CMakeFiles/ +CMakeCache.txt +CTestTestfile.cmake +Makefile +cmake_install.cmake +build/ +Testing/ + +# Python stuff + +# Editor temporary/working/backup files +*flymake* + +# Compiled source +*.a +*.dll +*.o +*.py[ocd] +*.so +.build_cache_dir +MANIFEST + +# Generated sources +*.c +*.cpp +# Python files + +# setup.py working directory +build +# setup.py dist directory +dist +# Egg metadata +*.egg-info +# coverage +.coverage +coverage.xml diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 0000000000000..df55bfac9eb4a --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,464 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party +# projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt + +cmake_minimum_required(VERSION 2.7) +project(pyarrow) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules") + +# Use common cmake modules from Arrow C++ if available +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules") + +include(CMakeParseArguments) + +set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../cpp/build-support) + +# Allow "make install" to not depend on all targets. +# +# Must be declared in the top-level CMakeLists.txt. +set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true) + +set(CMAKE_MACOSX_RPATH 1) +set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9) + +# Generate a Clang compile_commands.json "compilation database" file for use +# with various development tools, such as Vim's YouCompleteMe plugin. +# See http://clang.llvm.org/docs/JSONCompilationDatabase.html +if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1") + set(CMAKE_EXPORT_COMPILE_COMMANDS 1) +endif() + +############################################################ +# Compiler flags +############################################################ + +# compiler flags that are common across debug/release builds +set(CXX_COMMON_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall") + +# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE= .') +# For all builds: +# For CMAKE_BUILD_TYPE=Debug +# -ggdb: Enable gdb debugging +# For CMAKE_BUILD_TYPE=FastDebug +# Same as DEBUG, except with some optimizations on. +# For CMAKE_BUILD_TYPE=Release +# -O3: Enable all compiler optimizations +# -g: Enable symbols for profiler tools (TODO: remove for shipping) +# -DNDEBUG: Turn off dchecks/asserts/debug only code. +set(CXX_FLAGS_DEBUG "-ggdb -O0") +set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") +set(CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG") + +# if no build build type is specified, default to debug builds +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug) +endif(NOT CMAKE_BUILD_TYPE) + +string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) + +# Set compile flags based on the build type. +message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})") +if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") + set(CMAKE_CXX_FLAGS ${CXX_FLAGS_DEBUG}) +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") + set(CMAKE_CXX_FLAGS ${CXX_FLAGS_FASTDEBUG}) +elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + set(CMAKE_CXX_FLAGS ${CXX_FLAGS_RELEASE}) +else() + message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}") +endif () + +# Add common flags +set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}") + +# Determine compiler version +include(CompilerInfo) + +if ("${COMPILER_FAMILY}" STREQUAL "clang") + # Using Clang with ccache causes a bunch of spurious warnings that are + # purportedly fixed in the next version of ccache. See the following for details: + # + # http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html + # http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") +endif() + +set(PYARROW_LINK "a") + +# For any C code, use the same flags. +set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}") + +# Code coverage +if ("${PYARROW_GENERATE_COVERAGE}") + if("${CMAKE_CXX_COMPILER}" MATCHES ".*clang.*") + # There appears to be some bugs in clang 3.3 which cause code coverage + # to have link errors, not locating the llvm_gcda_* symbols. + # This should be fixed in llvm 3.4 with http://llvm.org/viewvc/llvm-project?view=revision&revision=184666 + message(SEND_ERROR "Cannot currently generate coverage with clang") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage -DCOVERAGE_BUILD") + + # For coverage to work properly, we need to use static linkage. Otherwise, + # __gcov_flush() doesn't properly flush coverage from every module. + # See http://stackoverflow.com/questions/28164543/using-gcov-flush-within-a-library-doesnt-force-the-other-modules-to-yield-gc + if("${PYARROW_LINK}" STREQUAL "a") + message("Using static linking for coverage build") + set(PYARROW_LINK "s") + elseif("${PYARROW_LINK}" STREQUAL "d") + message(SEND_ERROR "Cannot use coverage with static linking") + endif() +endif() + +# If we still don't know what kind of linking to perform, choose based on +# build type (developers like fast builds). +if ("${PYARROW_LINK}" STREQUAL "a") + if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG" OR + "${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") + message("Using dynamic linking for ${CMAKE_BUILD_TYPE} builds") + set(PYARROW_LINK "d") + else() + message("Using static linking for ${CMAKE_BUILD_TYPE} builds") + set(PYARROW_LINK "s") + endif() +endif() + +# Are we using the gold linker? It doesn't work with dynamic linking as +# weak symbols aren't properly overridden, causing tcmalloc to be omitted. +# Let's flag this as an error in RELEASE builds (we shouldn't release a +# product like this). +# +# See https://sourceware.org/bugzilla/show_bug.cgi?id=16979 for details. +# +# The gold linker is only for ELF binaries, which OSX doesn't use. We can +# just skip. +if (NOT APPLE) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Wl,--version OUTPUT_VARIABLE LINKER_OUTPUT) +endif () +if (LINKER_OUTPUT MATCHES "gold") + if ("${PYARROW_LINK}" STREQUAL "d" AND + "${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + message(SEND_ERROR "Cannot use gold with dynamic linking in a RELEASE build " + "as it would cause tcmalloc symbols to get dropped") + else() + message("Using gold linker") + endif() + set(PYARROW_USING_GOLD 1) +else() + message("Using ld linker") +endif() + +# Having set PYARROW_LINK due to build type and/or sanitizer, it's now safe to +# act on its value. +if ("${PYARROW_LINK}" STREQUAL "d") + set(BUILD_SHARED_LIBS ON) + + # Position independent code is only necessary when producing shared objects. + add_definitions(-fPIC) +endif() + +# set compile output directory +string (TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME) + +# If build in-source, create the latest symlink. If build out-of-source, which is +# preferred, simply output the binaries in the build folder +if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR}) + set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}/") + # Link build/latest to the current build directory, to avoid developers + # accidentally running the latest debug build when in fact they're building + # release builds. + FILE(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY}) + if (NOT APPLE) + set(MORE_ARGS "-T") + endif() +EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY} + ${CMAKE_CURRENT_BINARY_DIR}/build/latest) +else() + set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + # set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/") +endif() + +# where to put generated archives (.a files) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") +set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") + +# where to put generated libraries (.so files) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") +set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") + +# where to put generated binaries +set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}") + +## Python and libraries +find_package(PythonLibsNew REQUIRED) +include(UseCython) + +include_directories(SYSTEM + src) + +############################################################ +# Testing +############################################################ + +# Add a new test case, with or without an executable that should be built. +# +# REL_TEST_NAME is the name of the test. It may be a single component +# (e.g. monotime-test) or contain additional components (e.g. +# net/net_util-test). Either way, the last component must be a globally +# unique name. +# +# Arguments after the test name will be passed to set_tests_properties(). +function(ADD_PYARROW_TEST REL_TEST_NAME) + if(NO_TESTS) + return() + endif() + get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) + + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc) + # This test has a corresponding .cc file, set it up as an executable. + set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}") + add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc") + target_link_libraries(${TEST_NAME} ${PYARROW_TEST_LINK_LIBS}) + else() + # No executable, just invoke the test (probably a script) directly. + set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}) + endif() + + add_test(${TEST_NAME} + ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH}) + if(ARGN) + set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN}) + endif() +endfunction() + +# A wrapper for add_dependencies() that is compatible with NO_TESTS. +function(ADD_PYARROW_TEST_DEPENDENCIES REL_TEST_NAME) + if(NO_TESTS) + return() + endif() + get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) + + add_dependencies(${TEST_NAME} ${ARGN}) +endfunction() + +enable_testing() + +############################################################ +# Dependencies +############################################################ +function(ADD_THIRDPARTY_LIB LIB_NAME) + set(options) + set(one_value_args SHARED_LIB STATIC_LIB) + set(multi_value_args DEPS) + cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + + if(("${PYARROW_LINK}" STREQUAL "s" AND ARG_STATIC_LIB) OR (NOT ARG_SHARED_LIB)) + if(NOT ARG_STATIC_LIB) + message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") + endif() + add_library(${LIB_NAME} STATIC IMPORTED) + set_target_properties(${LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") + message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") + else() + add_library(${LIB_NAME} SHARED IMPORTED) + set_target_properties(${LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") + message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") + endif() + + if(ARG_DEPS) + set_target_properties(${LIB_NAME} + PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") + endif() + + # Set up an "exported variant" for this thirdparty library (see "Visibility" + # above). It's the same as the real target, just with an "_exported" suffix. + # We prefer the static archive if it exists (as it's akin to an "internal" + # library), but we'll settle for the shared object if we must. + # + # A shared object exported variant will force any "leaf" library that + # transitively depends on it to also depend on it at runtime; this is + # desirable for some libraries (e.g. cyrus_sasl). + set(LIB_NAME_EXPORTED ${LIB_NAME}_exported) + if(ARG_STATIC_LIB) + add_library(${LIB_NAME_EXPORTED} STATIC IMPORTED) + set_target_properties(${LIB_NAME_EXPORTED} + PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") + else() + add_library(${LIB_NAME_EXPORTED} SHARED IMPORTED) + set_target_properties(${LIB_NAME_EXPORTED} + PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") + endif() + if(ARG_DEPS) + set_target_properties(${LIB_NAME_EXPORTED} + PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") + endif() +endfunction() + +## GMock +find_package(GTest REQUIRED) +include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) +ADD_THIRDPARTY_LIB(gtest + STATIC_LIB ${GTEST_STATIC_LIB}) + +## Arrow +find_package(Arrow REQUIRED) +include_directories(SYSTEM ${ARROW_INCLUDE_DIR}) +ADD_THIRDPARTY_LIB(arrow + SHARED_LIB ${ARROW_SHARED_LIB}) + +############################################################ +# Linker setup +############################################################ + +set(PYARROW_MIN_TEST_LIBS + pyarrow_test_main + pyarrow) + +set(PYARROW_MIN_TEST_LIBS + pyarrow_test_main + pyarrow + ${PYARROW_BASE_LIBS}) + +set(PYARROW_TEST_LINK_LIBS ${PYARROW_MIN_TEST_LIBS}) + +############################################################ +# "make ctags" target +############################################################ +if (UNIX) + add_custom_target(ctags ctags -R --languages=c++,c --exclude=thirdparty/installed) +endif (UNIX) + +############################################################ +# "make etags" target +############################################################ +if (UNIX) + add_custom_target(tags etags --members --declarations + `find ${CMAKE_CURRENT_SOURCE_DIR}/src + -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or -name \\*.h -or -name \\*.c -or + -name \\*.f`) + add_custom_target(etags DEPENDS tags) +endif (UNIX) + +############################################################ +# "make cscope" target +############################################################ +if (UNIX) + add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR} + ( -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or + -name \\*.h -or -name \\*.c -or -name \\*.f ) + -exec echo \"{}\" \; > cscope.files && cscope -q -b VERBATIM) +endif (UNIX) + +############################################################ +# "make lint" target +############################################################ +if (UNIX) + # Full lint + add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py + --verbose=2 + --filter=-whitespace/comments,-readability/todo,-build/header_guard + `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`) +endif (UNIX) + +############################################################ +# Subdirectories +############################################################ + +add_subdirectory(src/pyarrow) +add_subdirectory(src/pyarrow/util) + +set(PYARROW_SRCS + src/pyarrow/init.cc +) + +set(LINK_LIBS + pyarrow_util + arrow +) + +add_library(pyarrow SHARED + ${PYARROW_SRCS}) +target_link_libraries(pyarrow ${LINK_LIBS}) +set_target_properties(pyarrow PROPERTIES LINKER_LANGUAGE CXX) + +if(APPLE) + set_target_properties(pyarrow PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") +endif() + +############################################################ +# Setup and build Cython modules +############################################################ + +foreach(pyx_api_file + arrow/config.pyx + arrow/parquet.pyx) + set_source_files_properties(${pyx_api_file} PROPERTIES CYTHON_API 1) +endforeach(pyx_api_file) + +set(USE_RELATIVE_RPATH ON) +set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) + +set(CYTHON_EXTENSIONS + config + parquet +) + +foreach(module ${CYTHON_EXTENSIONS}) + string(REPLACE "." ";" directories ${module}) + list(GET directories -1 module_name) + list(REMOVE_AT directories -1) + + string(REPLACE "." "/" module_root "${module}") + set(module_SRC arrow/${module_root}.pyx) + set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX 1) + + cython_add_module(${module_name} + ${module_name}_pyx + ${module_name}_output + ${module_SRC}) + + if (directories) + string(REPLACE ";" "/" module_output_directory ${directories}) + set_target_properties(${module_name} PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${module_output_directory}) + endif() + + if(APPLE) + set(module_install_rpath "@loader_path") + else() + set(module_install_rpath "$ORIGIN") + endif() + list(LENGTH directories i) + while(${i} GREATER 0) + set(module_install_rpath "${module_install_rpath}/..") + math(EXPR i "${i} - 1" ) + endwhile(${i} GREATER 0) + + # for inplace development for now + set(module_install_rpath "${CMAKE_SOURCE_DIR}/arrow/") + + set_target_properties(${module_name} PROPERTIES + INSTALL_RPATH ${module_install_rpath}) + target_link_libraries(${module_name} pyarrow) +endforeach(module) diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 100644 index 0000000000000..078e144ded1c1 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1,88 @@ +## 3rd-party licenses for code that has been adapted for the Arrow Python + library + +------------------------------------------------------------------------------- +Some code from pandas has been adapted for this codebase. pandas is available +under the 3-clause BSD license, which follows: + +pandas license +============== + +Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2008-2011 AQR Capital Management, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the copyright holder nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------- + +Some bits from DyND, in particular aspects of the build system, have been +adapted from libdynd and dynd-python under the terms of the BSD 2-clause +license + +The BSD 2-Clause License + + Copyright (C) 2011-12, Dynamic NDArray Developers + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Dynamic NDArray Developers list: + + * Mark Wiebe + * Continuum Analytics + +------------------------------------------------------------------------------- + +Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted +for Arrow. Ibis is released under the Apache License, Version 2.0. diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000000000..c79fa9786f476 --- /dev/null +++ b/python/README.md @@ -0,0 +1,14 @@ +## Python library for Apache Arrow + +This library provides a Pythonic API wrapper for the reference Arrow C++ +implementation, along with tools for interoperability with pandas, NumPy, and +other traditional Python scientific computing packages. + +#### Development details + +This project is layered in two pieces: + +* pyarrow, a C++ library for easier interoperability between Arrow C++, NumPy, + and pandas +* Cython extensions and pure Python code under arrow/ which expose Arrow C++ + and pyarrow to pure Python users \ No newline at end of file diff --git a/python/arrow/__init__.py b/python/arrow/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/python/arrow/compat.py b/python/arrow/compat.py new file mode 100644 index 0000000000000..2ac41ac8abf89 --- /dev/null +++ b/python/arrow/compat.py @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# flake8: noqa + +import itertools + +import numpy as np + +import sys +import six +from six import BytesIO, StringIO, string_types as py_string + + +PY26 = sys.version_info[:2] == (2, 6) +PY2 = sys.version_info[0] == 2 + + +if PY26: + import unittest2 as unittest +else: + import unittest + + +if PY2: + import cPickle + + try: + from cdecimal import Decimal + except ImportError: + from decimal import Decimal + + unicode_type = unicode + lzip = zip + zip = itertools.izip + + def dict_values(x): + return x.values() + + range = xrange + long = long + + def tobytes(o): + if isinstance(o, unicode): + return o.encode('utf8') + else: + return o + + def frombytes(o): + return o +else: + unicode_type = str + def lzip(*x): + return list(zip(*x)) + long = int + zip = zip + def dict_values(x): + return list(x.values()) + from decimal import Decimal + range = range + + def tobytes(o): + if isinstance(o, str): + return o.encode('utf8') + else: + return o + + def frombytes(o): + return o.decode('utf8') + + +integer_types = six.integer_types + (np.integer,) diff --git a/python/arrow/config.pyx b/python/arrow/config.pyx new file mode 100644 index 0000000000000..8f10beb3a2e72 --- /dev/null +++ b/python/arrow/config.pyx @@ -0,0 +1,8 @@ +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + +cdef extern from 'pyarrow/init.h' namespace 'arrow::py': + void pyarrow_init() + +pyarrow_init() diff --git a/python/arrow/includes/__init__.pxd b/python/arrow/includes/__init__.pxd new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd new file mode 100644 index 0000000000000..3635ceb868596 --- /dev/null +++ b/python/arrow/includes/arrow.pxd @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from arrow.includes.common cimport * + +cdef extern from "arrow/api.h" namespace "arrow" nogil: + pass diff --git a/python/arrow/includes/common.pxd b/python/arrow/includes/common.pxd new file mode 100644 index 0000000000000..f2fc826625e45 --- /dev/null +++ b/python/arrow/includes/common.pxd @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from libc.stdint cimport * +from libcpp cimport bool as c_bool +from libcpp.string cimport string +from libcpp.vector cimport vector + +# This must be included for cerr and other things to work +cdef extern from "": + pass + +cdef extern from "" namespace "std" nogil: + + cdef cppclass shared_ptr[T]: + T* get() + void reset() + void reset(T* p) diff --git a/python/arrow/includes/parquet.pxd b/python/arrow/includes/parquet.pxd new file mode 100644 index 0000000000000..62342f3066969 --- /dev/null +++ b/python/arrow/includes/parquet.pxd @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from arrow.includes.common cimport * + +cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil: + cdef cppclass ColumnReader: + pass + + cdef cppclass BoolReader(ColumnReader): + pass + + cdef cppclass Int32Reader(ColumnReader): + pass + + cdef cppclass Int64Reader(ColumnReader): + pass + + cdef cppclass Int96Reader(ColumnReader): + pass + + cdef cppclass FloatReader(ColumnReader): + pass + + cdef cppclass DoubleReader(ColumnReader): + pass + + cdef cppclass ByteArrayReader(ColumnReader): + pass + + cdef cppclass RowGroupReader: + pass + + cdef cppclass ParquetFileReader: + pass diff --git a/python/arrow/includes/pyarrow.pxd b/python/arrow/includes/pyarrow.pxd new file mode 100644 index 0000000000000..dcef663f3894d --- /dev/null +++ b/python/arrow/includes/pyarrow.pxd @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from arrow.includes.common cimport * + +cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil: + pass diff --git a/python/arrow/parquet.pyx b/python/arrow/parquet.pyx new file mode 100644 index 0000000000000..23c3838bcad1f --- /dev/null +++ b/python/arrow/parquet.pyx @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + +from arrow.compat import frombytes, tobytes +from arrow.includes.parquet cimport * diff --git a/python/arrow/tests/__init__.py b/python/arrow/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/python/cmake_modules/CompilerInfo.cmake b/python/cmake_modules/CompilerInfo.cmake new file mode 100644 index 0000000000000..e66bc2693eead --- /dev/null +++ b/python/cmake_modules/CompilerInfo.cmake @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Sets COMPILER_FAMILY to 'clang' or 'gcc' +# Sets COMPILER_VERSION to the version +execute_process(COMMAND "${CMAKE_CXX_COMPILER}" -v + ERROR_VARIABLE COMPILER_VERSION_FULL) +message(INFO " ${COMPILER_VERSION_FULL}") + +# clang on Linux and Mac OS X before 10.9 +if("${COMPILER_VERSION_FULL}" MATCHES ".*clang version.*") + set(COMPILER_FAMILY "clang") + string(REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" + COMPILER_VERSION "${COMPILER_VERSION_FULL}") +# clang on Mac OS X 10.9 and later +elseif("${COMPILER_VERSION_FULL}" MATCHES ".*based on LLVM.*") + set(COMPILER_FAMILY "clang") + string(REGEX REPLACE ".*based on LLVM ([0-9]+\\.[0.9]+).*" "\\1" + COMPILER_VERSION "${COMPILER_VERSION_FULL}") + +# clang on Mac OS X, XCode 7. No version replacement is done +# because Apple no longer advertises the upstream LLVM version. +elseif("${COMPILER_VERSION_FULL}" MATCHES "clang-700\\..*") + set(COMPILER_FAMILY "clang") + +# gcc +elseif("${COMPILER_VERSION_FULL}" MATCHES ".*gcc version.*") + set(COMPILER_FAMILY "gcc") + string(REGEX REPLACE ".*gcc version ([0-9\\.]+).*" "\\1" + COMPILER_VERSION "${COMPILER_VERSION_FULL}") +else() + message(FATAL_ERROR "Unknown compiler. Version info:\n${COMPILER_VERSION_FULL}") +endif() +message("Selected compiler ${COMPILER_FAMILY} ${COMPILER_VERSION}") diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake new file mode 100644 index 0000000000000..3d9983849ebb2 --- /dev/null +++ b/python/cmake_modules/FindArrow.cmake @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# - Find ARROW (arrow/api.h, libarrow.a, libarrow.so) +# This module defines +# ARROW_INCLUDE_DIR, directory containing headers +# ARROW_LIBS, directory containing arrow libraries +# ARROW_STATIC_LIB, path to libarrow.a +# ARROW_SHARED_LIB, path to libarrow's shared library +# ARROW_FOUND, whether arrow has been found + +set(ARROW_SEARCH_HEADER_PATHS + $ENV{ARROW_HOME}/include +) + +set(ARROW_SEARCH_LIB_PATH + $ENV{ARROW_HOME}/lib +) + +find_path(ARROW_INCLUDE_DIR arrow/array.h PATHS + ${ARROW_SEARCH_HEADER_PATHS} + # make sure we don't accidentally pick up a different version + NO_DEFAULT_PATH +) + +find_library(ARROW_LIB_PATH NAMES arrow + PATHS + ${ARROW_SEARCH_LIB_PATH} + NO_DEFAULT_PATH) + +if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH) + set(ARROW_FOUND TRUE) + set(ARROW_LIB_NAME libarrow) + set(ARROW_LIBS ${ARROW_SEARCH_LIB_PATH}) + set(ARROW_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_LIB_NAME}.a) + set(ARROW_SHARED_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) +else () + set(ARROW_FOUND FALSE) +endif () + +if (ARROW_FOUND) + if (NOT Arrow_FIND_QUIETLY) + message(STATUS "Found the Arrow library: ${ARROW_LIB_PATH}") + endif () +else () + if (NOT Arrow_FIND_QUIETLY) + set(ARROW_ERR_MSG "Could not find the Arrow library. Looked for headers") + set(ARROW_ERR_MSG "${ARROW_ERR_MSG} in ${ARROW_SEARCH_HEADER_PATHS}, and for libs") + set(ARROW_ERR_MSG "${ARROW_ERR_MSG} in ${ARROW_SEARCH_LIB_PATH}") + if (Arrow_FIND_REQUIRED) + message(FATAL_ERROR "${ARROW_ERR_MSG}") + else (Arrow_FIND_REQUIRED) + message(STATUS "${ARROW_ERR_MSG}") + endif (Arrow_FIND_REQUIRED) + endif () +endif () + +mark_as_advanced( + ARROW_INCLUDE_DIR + ARROW_LIBS + ARROW_STATIC_LIB + ARROW_SHARED_LIB +) diff --git a/python/cmake_modules/FindCython.cmake b/python/cmake_modules/FindCython.cmake new file mode 100644 index 0000000000000..9df3b5d59d274 --- /dev/null +++ b/python/cmake_modules/FindCython.cmake @@ -0,0 +1,30 @@ +# Find the Cython compiler. +# +# This code sets the following variables: +# +# CYTHON_EXECUTABLE +# +# See also UseCython.cmake + +#============================================================================= +# Copyright 2011 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +find_program( CYTHON_EXECUTABLE NAMES cython cython.bat ) + +include( FindPackageHandleStandardArgs ) +FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE ) + +mark_as_advanced( CYTHON_EXECUTABLE ) diff --git a/python/cmake_modules/FindNumPy.cmake b/python/cmake_modules/FindNumPy.cmake new file mode 100644 index 0000000000000..58bb531f5324a --- /dev/null +++ b/python/cmake_modules/FindNumPy.cmake @@ -0,0 +1,100 @@ +# - Find the NumPy libraries +# This module finds if NumPy is installed, and sets the following variables +# indicating where it is. +# +# TODO: Update to provide the libraries and paths for linking npymath lib. +# +# NUMPY_FOUND - was NumPy found +# NUMPY_VERSION - the version of NumPy found as a string +# NUMPY_VERSION_MAJOR - the major version number of NumPy +# NUMPY_VERSION_MINOR - the minor version number of NumPy +# NUMPY_VERSION_PATCH - the patch version number of NumPy +# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 +# NUMPY_INCLUDE_DIRS - path to the NumPy include files + +#============================================================================ +# Copyright 2012 Continuum Analytics, Inc. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +#============================================================================ + +# Finding NumPy involves calling the Python interpreter +if(NumPy_FIND_REQUIRED) + find_package(PythonInterp REQUIRED) +else() + find_package(PythonInterp) +endif() + +if(NOT PYTHONINTERP_FOUND) + set(NUMPY_FOUND FALSE) + return() +endif() + +execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import numpy as n; print(n.__version__); print(n.get_include());" + RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS + OUTPUT_VARIABLE _NUMPY_VALUES_OUTPUT + ERROR_VARIABLE _NUMPY_ERROR_VALUE + OUTPUT_STRIP_TRAILING_WHITESPACE) + +if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0) + if(NumPy_FIND_REQUIRED) + message(FATAL_ERROR + "NumPy import failure:\n${_NUMPY_ERROR_VALUE}") + endif() + set(NUMPY_FOUND FALSE) + return() +endif() + +# Convert the process output into a list +string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES_OUTPUT}) +string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES}) +list(GET _NUMPY_VALUES 0 NUMPY_VERSION) +list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS) + +string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" _VER_CHECK "${NUMPY_VERSION}") +if("${_VER_CHECK}" STREQUAL "") + # The output from Python was unexpected. Raise an error always + # here, because we found NumPy, but it appears to be corrupted somehow. + message(FATAL_ERROR + "Requested version and include path from NumPy, got instead:\n${_NUMPY_VALUES_OUTPUT}\n") + return() +endif() + +# Make sure all directory separators are '/' +string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS}) + +# Get the major and minor version numbers +string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION}) +list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR) +list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR) +list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH) +string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH}) +math(EXPR NUMPY_VERSION_DECIMAL + "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") + +find_package_message(NUMPY + "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}" + "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}") + +set(NUMPY_FOUND TRUE) diff --git a/python/cmake_modules/FindPythonLibsNew.cmake b/python/cmake_modules/FindPythonLibsNew.cmake new file mode 100644 index 0000000000000..c70e6bc26a719 --- /dev/null +++ b/python/cmake_modules/FindPythonLibsNew.cmake @@ -0,0 +1,236 @@ +# - Find python libraries +# This module finds the libraries corresponding to the Python interpeter +# FindPythonInterp provides. +# This code sets the following variables: +# +# PYTHONLIBS_FOUND - have the Python libs been found +# PYTHON_PREFIX - path to the Python installation +# PYTHON_LIBRARIES - path to the python library +# PYTHON_INCLUDE_DIRS - path to where Python.h is found +# PYTHON_SITE_PACKAGES - path to installation site-packages +# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build +# +# PYTHON_INCLUDE_PATH - path to where Python.h is found (deprecated) +# +# A function PYTHON_ADD_MODULE( src1 src2 ... srcN) is defined +# to build modules for python. +# +# Thanks to talljimbo for the patch adding the 'LDVERSION' config +# variable usage. + +#============================================================================= +# Copyright 2001-2009 Kitware, Inc. +# Copyright 2012-2014 Continuum Analytics, Inc. +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the names of Kitware, Inc., the Insight Software Consortium, +# nor the names of their contributors may be used to endorse or promote +# products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +# Use the Python interpreter to find the libs. +if(PythonLibsNew_FIND_REQUIRED) + find_package(PythonInterp REQUIRED) +else() + find_package(PythonInterp) +endif() + +if(NOT PYTHONINTERP_FOUND) + set(PYTHONLIBS_FOUND FALSE) + return() +endif() + +# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter +# testing whether sys has the gettotalrefcount function is a reliable, +# cross-platform way to detect a CPython debug interpreter. +# +# The library suffix is from the config var LDVERSION sometimes, otherwise +# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows. +# +# The config var LIBPL is for Linux, and helps on Debian Jessie where the +# addition of multi-arch support shuffled things around. +execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "from distutils import sysconfig as s;import sys;import struct; +print('.'.join(str(v) for v in sys.version_info)); +print(sys.prefix); +print(s.get_python_inc(plat_specific=True)); +print(s.get_python_lib(plat_specific=True)); +print(s.get_config_var('SO')); +print(hasattr(sys, 'gettotalrefcount')+0); +print(struct.calcsize('@P')); +print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); +print(s.get_config_var('LIBPL')); +" + RESULT_VARIABLE _PYTHON_SUCCESS + OUTPUT_VARIABLE _PYTHON_VALUES + ERROR_VARIABLE _PYTHON_ERROR_VALUE + OUTPUT_STRIP_TRAILING_WHITESPACE) + +if(NOT _PYTHON_SUCCESS MATCHES 0) + if(PythonLibsNew_FIND_REQUIRED) + message(FATAL_ERROR + "Python config failure:\n${_PYTHON_ERROR_VALUE}") + endif() + set(PYTHONLIBS_FOUND FALSE) + return() +endif() + +# Convert the process output into a list +string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) +string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) +list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST) +list(GET _PYTHON_VALUES 1 PYTHON_PREFIX) +list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR) +list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES) +list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION) +list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG) +list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P) +list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX) +list(GET _PYTHON_VALUES 8 PYTHON_LIBRARY_PATH) + +# Make sure the Python has the same pointer-size as the chosen compiler +# Skip the check on OS X, it doesn't consistently have CMAKE_SIZEOF_VOID_P defined +if((NOT APPLE) AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}")) + if(PythonLibsNew_FIND_REQUIRED) + math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8") + math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8") + message(FATAL_ERROR + "Python config failure: Python is ${_PYTHON_BITS}-bit, " + "chosen compiler is ${_CMAKE_BITS}-bit") + endif() + set(PYTHONLIBS_FOUND FALSE) + return() +endif() + +# The built-in FindPython didn't always give the version numbers +string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST}) +list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR) +list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR) +list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH) + +# Make sure all directory separators are '/' +string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) +string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR}) +string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES}) + +if(CMAKE_HOST_WIN32) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + set(PYTHON_LIBRARY + "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + else() + set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/libpython${PYTHON_LIBRARY_SUFFIX}.a") + endif() +elseif(APPLE) + # Seems to require "-undefined dynamic_lookup" instead of linking + # against the .dylib, otherwise it crashes. This flag is added + # below + set(PYTHON_LIBRARY "") + #set(PYTHON_LIBRARY + # "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib") +else() + if(${PYTHON_SIZEOF_VOID_P} MATCHES 8) + set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}") + else() + set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}") + endif() + message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}") + # Probably this needs to be more involved. It would be nice if the config + # information the python interpreter itself gave us were more complete. + find_library(PYTHON_LIBRARY + NAMES "python${PYTHON_LIBRARY_SUFFIX}" + PATHS ${_PYTHON_LIBS_SEARCH} + NO_DEFAULT_PATH) + message(STATUS "Found Python lib ${PYTHON_LIBRARY}") +endif() + +# For backward compatibility, set PYTHON_INCLUDE_PATH, but make it internal. +SET(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}" CACHE INTERNAL + "Path to where Python.h is found (deprecated)") + +MARK_AS_ADVANCED( + PYTHON_LIBRARY + PYTHON_INCLUDE_DIR +) + +# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the +# cache entries because they are meant to specify the location of a single +# library. We now set the variables listed by the documentation for this +# module. +SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}") +SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") +SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}") + + +# Don't know how to get to this directory, just doing something simple :P +#INCLUDE(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) +#FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs DEFAULT_MSG PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS) +find_package_message(PYTHON + "Found PythonLibs: ${PYTHON_LIBRARY}" + "${PYTHON_EXECUTABLE}${PYTHON_VERSION}") + + +# PYTHON_ADD_MODULE( src1 src2 ... srcN) is used to build modules for python. +FUNCTION(PYTHON_ADD_MODULE _NAME ) + GET_PROPERTY(_TARGET_SUPPORTS_SHARED_LIBS + GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS) + OPTION(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE) + OPTION(PYTHON_MODULE_${_NAME}_BUILD_SHARED + "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS}) + + # Mark these options as advanced + MARK_AS_ADVANCED(PYTHON_ENABLE_MODULE_${_NAME} + PYTHON_MODULE_${_NAME}_BUILD_SHARED) + + IF(PYTHON_ENABLE_MODULE_${_NAME}) + IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) + SET(PY_MODULE_TYPE MODULE) + ELSE(PYTHON_MODULE_${_NAME}_BUILD_SHARED) + SET(PY_MODULE_TYPE STATIC) + SET_PROPERTY(GLOBAL APPEND PROPERTY PY_STATIC_MODULES_LIST ${_NAME}) + ENDIF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) + + SET_PROPERTY(GLOBAL APPEND PROPERTY PY_MODULES_LIST ${_NAME}) + ADD_LIBRARY(${_NAME} ${PY_MODULE_TYPE} ${ARGN}) + IF(APPLE) + # On OS X, linking against the Python libraries causes + # segfaults, so do this dynamic lookup instead. + SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS + "-undefined dynamic_lookup") + ELSE() + TARGET_LINK_LIBRARIES(${_NAME} ${PYTHON_LIBRARIES}) + ENDIF() + IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) + SET_TARGET_PROPERTIES(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}") + SET_TARGET_PROPERTIES(${_NAME} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}") + ELSE() + ENDIF() + + ENDIF(PYTHON_ENABLE_MODULE_${_NAME}) +ENDFUNCTION(PYTHON_ADD_MODULE) \ No newline at end of file diff --git a/python/cmake_modules/UseCython.cmake b/python/cmake_modules/UseCython.cmake new file mode 100644 index 0000000000000..e7034db52f335 --- /dev/null +++ b/python/cmake_modules/UseCython.cmake @@ -0,0 +1,164 @@ +# Define a function to create Cython modules. +# +# For more information on the Cython project, see http://cython.org/. +# "Cython is a language that makes writing C extensions for the Python language +# as easy as Python itself." +# +# This file defines a CMake function to build a Cython Python module. +# To use it, first include this file. +# +# include( UseCython ) +# +# Then call cython_add_module to create a module. +# +# cython_add_module( ... ) +# +# Where is the desired name of the target for the resulting Python module, +# is the desired name of the target that runs the Cython compiler +# to generate the needed C or C++ files, is a variable to hold the +# files generated by Cython, and ... are source files +# to be compiled into the module, e.g. *.pyx, *.c, *.cxx, etc. +# only one .pyx file may be present for each target +# (this is an inherent limitation of Cython). +# +# The sample paths set with the CMake include_directories() command will be used +# for include directories to search for *.pxd when running the Cython complire. +# +# Cache variables that effect the behavior include: +# +# CYTHON_ANNOTATE +# CYTHON_NO_DOCSTRINGS +# CYTHON_FLAGS +# +# Source file properties that effect the build process are +# +# CYTHON_IS_CXX +# CYTHON_IS_PUBLIC +# CYTHON_IS_API +# +# If this is set of a *.pyx file with CMake set_source_files_properties() +# command, the file will be compiled as a C++ file. +# +# See also FindCython.cmake + +#============================================================================= +# Copyright 2011 Kitware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +# Configuration options. +set( CYTHON_ANNOTATE OFF + CACHE BOOL "Create an annotated .html file when compiling *.pyx." ) +set( CYTHON_NO_DOCSTRINGS OFF + CACHE BOOL "Strip docstrings from the compiled module." ) +set( CYTHON_FLAGS "" CACHE STRING + "Extra flags to the cython compiler." ) +mark_as_advanced( CYTHON_ANNOTATE CYTHON_NO_DOCSTRINGS CYTHON_FLAGS ) + +find_package( Cython REQUIRED ) +find_package( PythonLibsNew REQUIRED ) + +set( CYTHON_CXX_EXTENSION "cxx" ) +set( CYTHON_C_EXTENSION "c" ) + +# Create a *.c or *.cxx file from a *.pyx file. +# Input the generated file basename. The generate files will put into the variable +# placed in the "generated_files" argument. Finally all the *.py and *.pyx files. +function( compile_pyx _name pyx_target_name generated_files pyx_file) + # Default to assuming all files are C. + set( cxx_arg "" ) + set( extension ${CYTHON_C_EXTENSION} ) + set( pyx_lang "C" ) + set( comment "Compiling Cython C source for ${_name}..." ) + + get_filename_component( pyx_file_basename "${pyx_file}" NAME_WE ) + + # Determine if it is a C or C++ file. + get_source_file_property( property_is_cxx ${pyx_file} CYTHON_IS_CXX ) + if( ${property_is_cxx} ) + set( cxx_arg "--cplus" ) + set( extension ${CYTHON_CXX_EXTENSION} ) + set( pyx_lang "CXX" ) + set( comment "Compiling Cython CXX source for ${_name}..." ) + endif() + get_source_file_property( pyx_location ${pyx_file} LOCATION ) + + # Set additional flags. + if( CYTHON_ANNOTATE ) + set( annotate_arg "--annotate" ) + endif() + + if( CYTHON_NO_DOCSTRINGS ) + set( no_docstrings_arg "--no-docstrings" ) + endif() + + if(NOT WIN32) + if( "${CMAKE_BUILD_TYPE}" STREQUAL "Debug" OR + "${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo" ) + set( cython_debug_arg "--gdb" ) + endif() + endif() + + # Determining generated file names. + get_source_file_property( property_is_public ${pyx_file} CYTHON_PUBLIC ) + get_source_file_property( property_is_api ${pyx_file} CYTHON_API ) + if( ${property_is_api} ) + set( _generated_files "${_name}.${extension}" "${_name}.h" "${name}_api.h") + elseif( ${property_is_public} ) + set( _generated_files "${_name}.${extension}" "${_name}.h") + else() + set( _generated_files "${_name}.${extension}") + endif() + set_source_files_properties( ${_generated_files} PROPERTIES GENERATED TRUE ) + set( ${generated_files} ${_generated_files} PARENT_SCOPE ) + + # Add the command to run the compiler. + add_custom_target(${pyx_target_name} + COMMAND ${CYTHON_EXECUTABLE} ${cxx_arg} ${include_directory_arg} + ${annotate_arg} ${no_docstrings_arg} ${cython_debug_arg} ${CYTHON_FLAGS} + --output-file "${_name}.${extension}" ${pyx_location} + DEPENDS ${pyx_location} + # do not specify byproducts for now since they don't work with the older + # version of cmake available in the apt repositories. + #BYPRODUCTS ${_generated_files} + COMMENT ${comment} + ) + + # Remove their visibility to the user. + set( corresponding_pxd_file "" CACHE INTERNAL "" ) + set( header_location "" CACHE INTERNAL "" ) + set( pxd_location "" CACHE INTERNAL "" ) +endfunction() + +# cython_add_module( src1 src2 ... srcN ) +# Build the Cython Python module. +function( cython_add_module _name pyx_target_name generated_files) + set( pyx_module_source "" ) + set( other_module_sources "" ) + foreach( _file ${ARGN} ) + if( ${_file} MATCHES ".*\\.py[x]?$" ) + list( APPEND pyx_module_source ${_file} ) + else() + list( APPEND other_module_sources ${_file} ) + endif() + endforeach() + compile_pyx( ${_name} ${pyx_target_name} _generated_files ${pyx_module_source} ) + set( ${generated_files} ${_generated_files} PARENT_SCOPE ) + include_directories( ${PYTHON_INCLUDE_DIRS} ) + python_add_module( ${_name} ${_generated_files} ${other_module_sources} ) + add_dependencies( ${_name} ${pyx_target_name}) + target_link_libraries( ${_name} ${PYTHON_LIBRARIES} ) +endfunction() + +include( CMakeParseArguments ) diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 0000000000000..f6b0a4bee8316 --- /dev/null +++ b/python/setup.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import glob +import os.path as osp +import re +import shutil +from Cython.Distutils import build_ext as _build_ext +import Cython + +import sys + +import pkg_resources +from setuptools import setup + +import os + +from os.path import join as pjoin + +from distutils.command.clean import clean as _clean +from distutils import sysconfig + +# Check if we're running 64-bit Python +is_64_bit = sys.maxsize > 2**32 + +# Check if this is a debug build of Python. +if hasattr(sys, 'gettotalrefcount'): + build_type = 'Debug' +else: + build_type = 'Release' + +if Cython.__version__ < '0.19.1': + raise Exception('Please upgrade to Cython 0.19.1 or newer') + +MAJOR = 0 +MINOR = 1 +MICRO = 0 +VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) + + +class clean(_clean): + + def run(self): + _clean.run(self) + for x in []: + try: + os.remove(x) + except OSError: + pass + + +class build_ext(_build_ext): + + def build_extensions(self): + numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') + + for ext in self.extensions: + if (hasattr(ext, 'include_dirs') and + numpy_incl not in ext.include_dirs): + ext.include_dirs.append(numpy_incl) + _build_ext.build_extensions(self) + + def run(self): + self._run_cmake() + _build_ext.run(self) + + # adapted from cmake_build_ext in dynd-python + # github.com/libdynd/dynd-python + + description = "Build the C-extensions for arrow" + user_options = ([('extra-cmake-args=', None, + 'extra arguments for CMake')] + + _build_ext.user_options) + + def initialize_options(self): + _build_ext.initialize_options(self) + self.extra_cmake_args = '' + + def _run_cmake(self): + # The directory containing this setup.py + source = osp.dirname(osp.abspath(__file__)) + + # The staging directory for the module being built + build_temp = pjoin(os.getcwd(), self.build_temp) + + # Change to the build directory + saved_cwd = os.getcwd() + if not os.path.isdir(self.build_temp): + self.mkpath(self.build_temp) + os.chdir(self.build_temp) + + # Detect if we built elsewhere + if os.path.isfile('CMakeCache.txt'): + cachefile = open('CMakeCache.txt', 'r') + cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)', + cachefile.read()).group(1) + cachefile.close() + if (cachedir != build_temp): + return + + pyexe_option = '-DPYTHON_EXECUTABLE=%s' % sys.executable + static_lib_option = '' + build_tests_option = '' + + if sys.platform != 'win32': + cmake_command = ['cmake', self.extra_cmake_args, pyexe_option, + build_tests_option, + static_lib_option, source] + + self.spawn(cmake_command) + self.spawn(['make']) + else: + import shlex + cmake_generator = 'Visual Studio 14 2015' + if is_64_bit: + cmake_generator += ' Win64' + # Generate the build files + extra_cmake_args = shlex.split(self.extra_cmake_args) + cmake_command = (['cmake'] + extra_cmake_args + + [source, pyexe_option, + static_lib_option, + build_tests_option, + '-G', cmake_generator]) + if "-G" in self.extra_cmake_args: + cmake_command = cmake_command[:-2] + + self.spawn(cmake_command) + # Do the build + self.spawn(['cmake', '--build', '.', '--config', build_type]) + + if self.inplace: + # a bit hacky + build_lib = saved_cwd + else: + build_lib = pjoin(os.getcwd(), self.build_lib) + + # Move the built libpyarrow library to the place expected by the Python + # build + if sys.platform != 'win32': + name, = glob.glob('libpyarrow.*') + try: + os.makedirs(pjoin(build_lib, 'arrow')) + except OSError: + pass + shutil.move(name, pjoin(build_lib, 'arrow', name)) + else: + shutil.move(pjoin(build_type, 'pyarrow.dll'), + pjoin(build_lib, 'arrow', 'pyarrow.dll')) + + # Move the built C-extension to the place expected by the Python build + self._found_names = [] + for name in self.get_cmake_cython_names(): + built_path = self.get_ext_built(name) + if not os.path.exists(built_path): + print(built_path) + raise RuntimeError('libpyarrow C-extension failed to build:', + os.path.abspath(built_path)) + + ext_path = pjoin(build_lib, self._get_cmake_ext_path(name)) + if os.path.exists(ext_path): + os.remove(ext_path) + self.mkpath(os.path.dirname(ext_path)) + print('Moving built libpyarrow C-extension', built_path, + 'to build path', ext_path) + shutil.move(self.get_ext_built(name), ext_path) + self._found_names.append(name) + + os.chdir(saved_cwd) + + def _get_inplace_dir(self): + pass + + def _get_cmake_ext_path(self, name): + # Get the package directory from build_py + build_py = self.get_finalized_command('build_py') + package_dir = build_py.get_package_dir('arrow') + # This is the name of the arrow C-extension + suffix = sysconfig.get_config_var('EXT_SUFFIX') + if suffix is None: + suffix = sysconfig.get_config_var('SO') + filename = name + suffix + return pjoin(package_dir, filename) + + def get_ext_built(self, name): + if sys.platform == 'win32': + head, tail = os.path.split(name) + suffix = sysconfig.get_config_var('SO') + return pjoin(head, build_type, tail + suffix) + else: + suffix = sysconfig.get_config_var('SO') + return name + suffix + + def get_cmake_cython_names(self): + return ['config', 'parquet'] + + def get_names(self): + return self._found_names + + def get_outputs(self): + # Just the C extensions + cmake_exts = [self._get_cmake_ext_path(name) + for name in self.get_names()] + regular_exts = _build_ext.get_outputs(self) + return regular_exts + cmake_exts + + +extensions = [] + +DESC = """\ +Python library for Apache Arrow""" + +setup( + name="arrow", + packages=['arrow', 'arrow.tests'], + version=VERSION, + package_data={'arrow': ['*.pxd', '*.pyx']}, + ext_modules=extensions, + cmdclass={ + 'clean': clean, + 'build_ext': build_ext + }, + install_requires=['cython >= 0.21'], + description=DESC, + license='Apache License, Version 2.0', + maintainer="Apache Arrow Developers", + maintainer_email="dev@arrow.apache.org", + test_suite="arrow.tests" +) diff --git a/python/src/pyarrow/CMakeLists.txt b/python/src/pyarrow/CMakeLists.txt new file mode 100644 index 0000000000000..e20c3238b5f78 --- /dev/null +++ b/python/src/pyarrow/CMakeLists.txt @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####################################### +# Unit tests +####################################### diff --git a/python/src/pyarrow/api.h b/python/src/pyarrow/api.h new file mode 100644 index 0000000000000..c2285de77bf10 --- /dev/null +++ b/python/src/pyarrow/api.h @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef PYARROW_API_H +#define PYARROW_API_H + +#endif // PYARROW_API_H diff --git a/python/src/pyarrow/init.cc b/python/src/pyarrow/init.cc new file mode 100644 index 0000000000000..c36f413725532 --- /dev/null +++ b/python/src/pyarrow/init.cc @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "pyarrow/init.h" + +namespace arrow { + +namespace py { + +void pyarrow_init() { +} + +} // namespace py + +} // namespace arrow diff --git a/python/src/pyarrow/init.h b/python/src/pyarrow/init.h new file mode 100644 index 0000000000000..1fc9f10102696 --- /dev/null +++ b/python/src/pyarrow/init.h @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef PYARROW_INIT_H +#define PYARROW_INIT_H + +namespace arrow { + +namespace py { + +void pyarrow_init(); + +} // namespace py + +} // namespace arrow + +#endif // PYARROW_INIT_H diff --git a/python/src/pyarrow/util/CMakeLists.txt b/python/src/pyarrow/util/CMakeLists.txt new file mode 100644 index 0000000000000..60dc80eb38cb6 --- /dev/null +++ b/python/src/pyarrow/util/CMakeLists.txt @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####################################### +# pyarrow_util +####################################### + +set(UTIL_SRCS +) + +set(UTIL_LIBS +) + +add_library(pyarrow_util STATIC + ${UTIL_SRCS} +) +target_link_libraries(pyarrow_util ${UTIL_LIBS}) +SET_TARGET_PROPERTIES(pyarrow_util PROPERTIES LINKER_LANGUAGE CXX) + +####################################### +# pyarrow_test_main +####################################### + +add_library(pyarrow_test_main + test_main.cc) + +if (APPLE) + target_link_libraries(pyarrow_test_main + gmock + dl) + set_target_properties(pyarrow_test_main + PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") +else() + target_link_libraries(pyarrow_test_main + gtest + pthread + dl + ) +endif() diff --git a/python/src/pyarrow/util/test_main.cc b/python/src/pyarrow/util/test_main.cc new file mode 100644 index 0000000000000..00139f36742ed --- /dev/null +++ b/python/src/pyarrow/util/test_main.cc @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + int ret = RUN_ALL_TESTS(); + + return ret; +}