From ba774379bcb510cd84d5316ba78a9c4d008a8898 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 18 Jul 2023 18:29:16 +0900 Subject: [PATCH] GH-36730: [Python] Add support for Cython 3.0.0 --- .github/workflows/dev.yml | 2 +- ci/conda_env_python.txt | 2 +- dev/release/verify-release-candidate.sh | 2 +- docs/source/cpp/compute.rst | 18 +++++++++++++ python/CMakeLists.txt | 29 +++++++++++++-------- python/pyarrow/_flight.pyx | 10 ++++--- python/pyarrow/includes/libarrow_flight.pxd | 15 +++++++---- python/pyarrow/ipc.pxi | 15 ++++++----- python/pyarrow/scalar.pxi | 4 +-- python/pyproject.toml | 2 +- python/requirements-build.txt | 2 +- python/requirements-wheel-build.txt | 2 +- python/setup.py | 6 ++--- 13 files changed, 72 insertions(+), 37 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index cee3c74762c3c..e8fe565ace0cc 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit - pip install "cython<3" setuptools six pytest jira + pip install cython setuptools six pytest jira - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 4ae5c3614a1dc..04f985c94bb2c 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -18,7 +18,7 @@ # don't add pandas here, because it is not a mandatory test dependency boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture cffi -cython<3 +cython cloudpickle fsspec hypothesis diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index ce31b497c1fab..8c5de9bda85aa 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -665,7 +665,7 @@ test_python() { show_header "Build and test Python libraries" # Build and test Python - maybe_setup_virtualenv "cython<3" numpy setuptools_scm setuptools || exit 1 + maybe_setup_virtualenv cython numpy setuptools_scm setuptools || exit 1 maybe_setup_conda --file ci/conda_env_python.txt || exit 1 if [ "${USE_CONDA}" -gt 0 ]; then diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 44f43cbc877ca..969fc386dbf4f 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1629,6 +1629,7 @@ do not detect overflow. They are alsoavailable in an overflow-checking variant, suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when overflow is detected. +<<<<<<< HEAD +-------------------------+-------+-------------+-------------+--------------------------------+-----------+ | Function name | Arity | Input types | Output type | Options class | Notes | +=========================+=======+=============+=============+================================+===========+ @@ -1646,6 +1647,23 @@ overflow is detected. +-------------------------+-------+-------------+-------------+--------------------------------+-----------+ | cumulative_mean | Unary | Numeric | Float64 | :struct:`CumulativeOptions` | \(1) \(2) | +-------------------------+-------+-------------+-------------+--------------------------------+-----------+ +======= ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +| Function name | Arity | Input types | Output type | Options class | Notes | ++=========================+=======+=============+=============+================================+=======+ +| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +| cumulative_prod | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +| cumulative_prod_checked | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +| cumulative_max | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +| cumulative_min | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ +>>>>>>> 9b9f7a317 (Fix table format) * \(1) CumulativeOptions has two optional parameters. The first parameter :member:`CumulativeOptions::start` is a starting value for the running diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 242ba8448f4a6..29f8d2da72f3a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -168,37 +168,44 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") if(MSVC) # MSVC version of -Wno-return-type-c-linkage - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4190") + string(APPEND CMAKE_CXX_FLAGS " /wd4190") # Cython generates some bitshift expressions that MSVC does not like in # __Pyx_PyFloat_DivideObjC - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4293") + string(APPEND CMAKE_CXX_FLAGS " /wd4293") # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning # seem harmless, and probably not worth the effort of working around it - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4800") + string(APPEND CMAKE_CXX_FLAGS " /wd4800") # See https://github.com/cython/cython/issues/2731. Change introduced in # Cython 0.29.1 causes "unsafe use of type 'bool' in operation" - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4804") + string(APPEND CMAKE_CXX_FLAGS " /wd4804") + + # See https://github.com/cython/cython/issues/4445. + # + # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a + # "unused function" warning but the code emits another "function + # call missing argument list" warning. + string(APPEND CMAKE_CXX_FLAGS " /wd4551") else() # Enable perf and other tools to work properly - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") + string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer") # Suppress Cython warnings - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized") + string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized") if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # Cython warnings in clang - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized") + string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality") + string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand") + string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations") + string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized") # We have public Cython APIs which return C++ types, which are in an extern # "C" blog (no symbol mangling) and clang doesn't like this - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage") + string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage") endif() endif() diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index 42b221ed72a1b..79aa24e4ce8e3 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -988,8 +988,10 @@ cdef class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin): cdef shared_ptr[CMetadataRecordBatchReader] reader def __iter__(self): - while True: - yield self.read_chunk() + return self + + def __next__(self): + return self.read_chunk() @property def schema(self): @@ -1699,7 +1701,9 @@ cdef class FlightClient(_Weakrefable): def close(self): """Close the client and disconnect.""" - check_flight_status(self.client.get().Close()) + client = self.client.get() + if client != NULL: + check_flight_status(client.Close()) def __del__(self): # Not ideal, but close() wasn't originally present so diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd index 4bddd2d080f5f..c4cf5830c4128 100644 --- a/python/pyarrow/includes/libarrow_flight.pxd +++ b/python/pyarrow/includes/libarrow_flight.pxd @@ -118,16 +118,16 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: c_bool Equals(const CLocation& other) @staticmethod - CResult[CLocation] Parse(c_string& uri_string) + CResult[CLocation] Parse(const c_string& uri_string) @staticmethod - CResult[CLocation] ForGrpcTcp(c_string& host, int port) + CResult[CLocation] ForGrpcTcp(const c_string& host, int port) @staticmethod - CResult[CLocation] ForGrpcTls(c_string& host, int port) + CResult[CLocation] ForGrpcTls(const c_string& host, int port) @staticmethod - CResult[CLocation] ForGrpcUnix(c_string& path) + CResult[CLocation] ForGrpcUnix(const c_string& path) cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint": CFlightEndpoint() @@ -172,7 +172,9 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: CResult[unique_ptr[CFlightInfo]] Next() cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing": - CSimpleFlightListing(vector[CFlightInfo]&& info) + # This doesn't work with Cython >= 3 + # CSimpleFlightListing(vector[CFlightInfo]&& info) + CSimpleFlightListing(const vector[CFlightInfo]& info) cdef cppclass CFlightPayload" arrow::flight::FlightPayload": shared_ptr[CBuffer] descriptor @@ -310,7 +312,10 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: cdef cppclass CCallHeaders" arrow::flight::CallHeaders": cppclass const_iterator: pair[c_string, c_string] operator*() + # For Cython < 3 const_iterator operator++() + # For Cython >= 3 + const_iterator operator++(int) bint operator==(const_iterator) bint operator!=(const_iterator) const_iterator cbegin() diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi index a8398597fe6cd..53e521fc11468 100644 --- a/python/pyarrow/ipc.pxi +++ b/python/pyarrow/ipc.pxi @@ -436,8 +436,10 @@ cdef class MessageReader(_Weakrefable): return result def __iter__(self): - while True: - yield self.read_next_message() + return self + + def __next__(self): + return self.read_next_message() def read_next_message(self): """ @@ -656,11 +658,10 @@ cdef class RecordBatchReader(_Weakrefable): # cdef block is in lib.pxd def __iter__(self): - while True: - try: - yield self.read_next_batch() - except StopIteration: - return + return self + + def __next__(self): + return self.read_next_batch() @property def schema(self): diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index e07949c675524..9a66dc81226d4 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -819,8 +819,8 @@ cdef class MapScalar(ListScalar): Iterate over this element's values. """ arr = self.values - if array is None: - raise StopIteration + if arr is None: + return for k, v in zip(arr.field(self.type.key_field.name), arr.field(self.type.item_field.name)): yield (k.as_py(), v.as_py()) diff --git a/python/pyproject.toml b/python/pyproject.toml index 7e61304585809..fe8c938a9ce4f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,7 +17,7 @@ [build-system] requires = [ - "cython >= 0.29.31,<3", + "cython >= 0.29.31", "oldest-supported-numpy>=0.14", "setuptools_scm", "setuptools >= 40.1.0", diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 6378d1b94e1bb..507e9081373e2 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31,<3 +cython>=0.29.31 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=38.6.0 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index e4f5243fbc2fe..6043d2ffb2c6e 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31,<3 +cython>=0.29.31 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=58 diff --git a/python/setup.py b/python/setup.py index abd9d03cfb17e..0a6fc861eb515 100755 --- a/python/setup.py +++ b/python/setup.py @@ -40,9 +40,9 @@ # Check if we're running 64-bit Python is_64_bit = sys.maxsize > 2**32 -if Cython.__version__ < '0.29.31' or Cython.__version__ >= '3.0': +if Cython.__version__ < '0.29.31': raise Exception( - 'Please update your Cython version. Supported Cython >= 0.29.31, < 3.0') + 'Please update your Cython version. Supported Cython >= 0.29.31') setup_dir = os.path.abspath(os.path.dirname(__file__)) @@ -492,7 +492,7 @@ def has_ext_modules(foo): 'pyarrow/_generated_version.py'), 'version_scheme': guess_next_dev_version }, - setup_requires=['setuptools_scm', 'cython >= 0.29.31,<3'] + setup_requires, + setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires, install_requires=install_requires, tests_require=['pytest', 'pandas', 'hypothesis'], python_requires='>=3.8',