From 6a6171ad111906258d36d0ff2696f939217fddd9 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 18 Jul 2023 18:29:16 +0900 Subject: [PATCH 01/12] GH-36730: [Python] Add support for Cython 3.0.0 --- python/pyarrow/includes/libarrow_flight.pxd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd index 34ba809438e2c..30adb20d0a702 100644 --- a/python/pyarrow/includes/libarrow_flight.pxd +++ b/python/pyarrow/includes/libarrow_flight.pxd @@ -172,7 +172,9 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: CResult[unique_ptr[CFlightInfo]] Next() cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing": - CSimpleFlightListing(vector[CFlightInfo]&& info) + # This doesn't work with Cython >= 3 + # CSimpleFlightListing(vector[CFlightInfo]&& info) + CSimpleFlightListing(const vector[CFlightInfo]& info) cdef cppclass CFlightPayload" arrow::flight::FlightPayload": shared_ptr[CBuffer] descriptor @@ -308,7 +310,10 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: cdef cppclass CCallHeaders" arrow::flight::CallHeaders": cppclass const_iterator: pair[c_string, c_string] operator*() + # For Cython < 3 const_iterator operator++() + # For Cython >= 3 + const_iterator operator++(int) bint operator==(const_iterator) bint operator!=(const_iterator) const_iterator cbegin() From ecad6bd7d61b7ff8e9df0950bada25e6a173ba22 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 19 Jul 2023 06:13:17 +0900 Subject: [PATCH 02/12] Add missing const --- python/pyarrow/includes/libarrow_flight.pxd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd index 30adb20d0a702..c0f54134fd56a 100644 --- a/python/pyarrow/includes/libarrow_flight.pxd +++ b/python/pyarrow/includes/libarrow_flight.pxd @@ -118,16 +118,16 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: c_bool Equals(const CLocation& other) @staticmethod - CResult[CLocation] Parse(c_string& uri_string) + CResult[CLocation] Parse(const c_string& uri_string) @staticmethod - CResult[CLocation] ForGrpcTcp(c_string& host, int port) + CResult[CLocation] ForGrpcTcp(const c_string& host, int port) @staticmethod - CResult[CLocation] ForGrpcTls(c_string& host, int port) + CResult[CLocation] ForGrpcTls(const c_string& host, int port) @staticmethod - CResult[CLocation] ForGrpcUnix(c_string& path) + CResult[CLocation] ForGrpcUnix(const c_string& path) cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint": CFlightEndpoint() From 945ac7d209ec972231a368426766082daa905e06 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 19 Jul 2023 06:13:34 +0900 Subject: [PATCH 03/12] Suppress C4551 --- python/CMakeLists.txt | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 21a2ace5216bb..447120c86c62e 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -168,37 +168,44 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") if(MSVC) # MSVC version of -Wno-return-type-c-linkage - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4190") + string(APPEND CMAKE_CXX_FLAGS " /wd4190") # Cython generates some bitshift expressions that MSVC does not like in # __Pyx_PyFloat_DivideObjC - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4293") + string(APPEND CMAKE_CXX_FLAGS " /wd4293") # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning # seem harmless, and probably not worth the effort of working around it - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4800") + string(APPEND CMAKE_CXX_FLAGS " /wd4800") # See https://github.com/cython/cython/issues/2731. Change introduced in # Cython 0.29.1 causes "unsafe use of type 'bool' in operation" - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4804") + string(APPEND CMAKE_CXX_FLAGS " /wd4804") + + # See https://github.com/cython/cython/issues/4445. + # + # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a + # "unused function" warning but the code emits another "function + # call missing argument list" warning. + string(APPEND CMAKE_CXX_FLAGS " /wd4551") else() # Enable perf and other tools to work properly - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") + string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer") # Suppress Cython warnings - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized") + string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized") if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # Cython warnings in clang - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized") + string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality") + string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand") + string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations") + string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized") # We have public Cython APIs which return C++ types, which are in an extern # "C" blog (no symbol mangling) and clang doesn't like this - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage") + string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage") endif() endif() From 978b47cd37c885ad07c07405c3ab23cf8347e987 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 20 Jul 2023 05:47:50 +0900 Subject: [PATCH 04/12] Follow generator change --- python/pyarrow/ipc.pxi | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi index a8398597fe6cd..f5e2fab701193 100644 --- a/python/pyarrow/ipc.pxi +++ b/python/pyarrow/ipc.pxi @@ -437,7 +437,11 @@ cdef class MessageReader(_Weakrefable): def __iter__(self): while True: - yield self.read_next_message() + try: + yield self.read_next_message() + except StopIteration: + # For Cython >= 3.0.0 + return def read_next_message(self): """ @@ -660,6 +664,7 @@ cdef class RecordBatchReader(_Weakrefable): try: yield self.read_next_batch() except StopIteration: + # For Cython >= 3.0.0 return @property From cb6f36d0688fd8de0b171db5298b936894e4f269 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 20 Jul 2023 08:39:12 +0900 Subject: [PATCH 05/12] Unpin Cython --- .github/workflows/dev.yml | 2 +- ci/conda_env_python.txt | 2 +- dev/release/verify-release-candidate.sh | 2 +- python/pyproject.toml | 2 +- python/requirements-build.txt | 2 +- python/requirements-wheel-build.txt | 2 +- python/setup.py | 6 +++--- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 119d11d9a399a..7c2437f6edfb5 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit - pip install "cython<3" setuptools six pytest jira + pip install cython setuptools six pytest jira - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 4ae5c3614a1dc..04f985c94bb2c 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -18,7 +18,7 @@ # don't add pandas here, because it is not a mandatory test dependency boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture cffi -cython<3 +cython cloudpickle fsspec hypothesis diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index ce31b497c1fab..8c5de9bda85aa 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -665,7 +665,7 @@ test_python() { show_header "Build and test Python libraries" # Build and test Python - maybe_setup_virtualenv "cython<3" numpy setuptools_scm setuptools || exit 1 + maybe_setup_virtualenv cython numpy setuptools_scm setuptools || exit 1 maybe_setup_conda --file ci/conda_env_python.txt || exit 1 if [ "${USE_CONDA}" -gt 0 ]; then diff --git a/python/pyproject.toml b/python/pyproject.toml index 7e61304585809..fe8c938a9ce4f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,7 +17,7 @@ [build-system] requires = [ - "cython >= 0.29.31,<3", + "cython >= 0.29.31", "oldest-supported-numpy>=0.14", "setuptools_scm", "setuptools >= 40.1.0", diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 6378d1b94e1bb..507e9081373e2 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31,<3 +cython>=0.29.31 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=38.6.0 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index e4f5243fbc2fe..6043d2ffb2c6e 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31,<3 +cython>=0.29.31 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=58 diff --git a/python/setup.py b/python/setup.py index dc529679c7f90..6958aec5a6876 100755 --- a/python/setup.py +++ b/python/setup.py @@ -40,9 +40,9 @@ # Check if we're running 64-bit Python is_64_bit = sys.maxsize > 2**32 -if Cython.__version__ < '0.29.31' or Cython.__version__ >= '3.0': +if Cython.__version__ < '0.29.31' raise Exception( - 'Please update your Cython version. Supported Cython >= 0.29.31, < 3.0') + 'Please update your Cython version. Supported Cython >= 0.29.31') setup_dir = os.path.abspath(os.path.dirname(__file__)) @@ -492,7 +492,7 @@ def has_ext_modules(foo): 'pyarrow/_generated_version.py'), 'version_scheme': guess_next_dev_version }, - setup_requires=['setuptools_scm', 'cython >= 0.29.31,<3'] + setup_requires, + setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires, install_requires=install_requires, tests_require=['pytest', 'pandas', 'hypothesis'], python_requires='>=3.8', From 7d9a1ed6d89addfad0c846bf4bfc98f934372509 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 20 Jul 2023 08:45:44 +0900 Subject: [PATCH 06/12] Add a missing : --- python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index 6958aec5a6876..4c3386ea726b2 100755 --- a/python/setup.py +++ b/python/setup.py @@ -40,7 +40,7 @@ # Check if we're running 64-bit Python is_64_bit = sys.maxsize > 2**32 -if Cython.__version__ < '0.29.31' +if Cython.__version__ < '0.29.31': raise Exception( 'Please update your Cython version. Supported Cython >= 0.29.31') From 49b2e251bcc7a07e1623883ee6a3917a7792c8ea Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 20 Jul 2023 16:37:51 +0900 Subject: [PATCH 07/12] Add missing NULL check --- python/pyarrow/_flight.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index 6f5cd03cd56bf..64659805cc5d8 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -1625,7 +1625,9 @@ cdef class FlightClient(_Weakrefable): def close(self): """Close the client and disconnect.""" - check_flight_status(self.client.get().Close()) + client = self.client.get() + if client != NULL: + check_flight_status(client.Close()) def __del__(self): # Not ideal, but close() wasn't originally present so From 3ae02eefacd3bfe43de8f744f361da17c980b3a5 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 20 Jul 2023 17:06:16 +0900 Subject: [PATCH 08/12] Don't use StopIteration in __iter__ --- python/pyarrow/_flight.pyx | 5 ++++- python/pyarrow/ipc.pxi | 2 -- python/pyarrow/scalar.pxi | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index 64659805cc5d8..b817cb87894b9 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -983,7 +983,10 @@ cdef class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin): def __iter__(self): while True: - yield self.read_chunk() + try: + yield self.read_chunk() + except StopIteration: + return @property def schema(self): diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi index f5e2fab701193..1336dac39d7a3 100644 --- a/python/pyarrow/ipc.pxi +++ b/python/pyarrow/ipc.pxi @@ -440,7 +440,6 @@ cdef class MessageReader(_Weakrefable): try: yield self.read_next_message() except StopIteration: - # For Cython >= 3.0.0 return def read_next_message(self): @@ -664,7 +663,6 @@ cdef class RecordBatchReader(_Weakrefable): try: yield self.read_next_batch() except StopIteration: - # For Cython >= 3.0.0 return @property diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index f438c8847bb02..b6fdc41f321ea 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -793,7 +793,7 @@ cdef class MapScalar(ListScalar): """ arr = self.values if array is None: - raise StopIteration + return for k, v in zip(arr.field('key'), arr.field('value')): yield (k.as_py(), v.as_py()) From e76f77a4cdb8ae2c467d64c64c2405bd8fcf3141 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 20 Jul 2023 17:11:32 +0900 Subject: [PATCH 09/12] Use __next__ --- python/pyarrow/_flight.pyx | 9 ++++----- python/pyarrow/ipc.pxi | 18 ++++++++---------- python/pyarrow/scalar.pxi | 7 +++++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx index b817cb87894b9..2d369fed85e3d 100644 --- a/python/pyarrow/_flight.pyx +++ b/python/pyarrow/_flight.pyx @@ -982,11 +982,10 @@ cdef class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin): cdef shared_ptr[CMetadataRecordBatchReader] reader def __iter__(self): - while True: - try: - yield self.read_chunk() - except StopIteration: - return + return self + + def __next__(self): + return self.read_chunk() @property def schema(self): diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi index 1336dac39d7a3..53e521fc11468 100644 --- a/python/pyarrow/ipc.pxi +++ b/python/pyarrow/ipc.pxi @@ -436,11 +436,10 @@ cdef class MessageReader(_Weakrefable): return result def __iter__(self): - while True: - try: - yield self.read_next_message() - except StopIteration: - return + return self + + def __next__(self): + return self.read_next_message() def read_next_message(self): """ @@ -659,11 +658,10 @@ cdef class RecordBatchReader(_Weakrefable): # cdef block is in lib.pxd def __iter__(self): - while True: - try: - yield self.read_next_batch() - except StopIteration: - return + return self + + def __next__(self): + return self.read_next_batch() @property def schema(self): diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index b6fdc41f321ea..6ccbef1df6454 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -791,9 +791,12 @@ cdef class MapScalar(ListScalar): """ Iterate over this element's values. """ + return self + + def __next__(self): arr = self.values - if array is None: - return + if arr is None: + raise StopIteration for k, v in zip(arr.field('key'), arr.field('value')): yield (k.as_py(), v.as_py()) From 113f90c2fb98af4d29b4ea48081b246b80433eeb Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 21 Jul 2023 05:41:40 +0900 Subject: [PATCH 10/12] Don't use __next__ for MapScalar --- python/pyarrow/scalar.pxi | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index 6ccbef1df6454..00864540356f3 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -791,12 +791,9 @@ cdef class MapScalar(ListScalar): """ Iterate over this element's values. """ - return self - - def __next__(self): arr = self.values if arr is None: - raise StopIteration + return for k, v in zip(arr.field('key'), arr.field('value')): yield (k.as_py(), v.as_py()) From 001f562ebcbd024b3bfe6384b2fc0c0eaa4258a6 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 21 Jul 2023 08:34:25 +0900 Subject: [PATCH 11/12] Fix table format --- docs/source/cpp/compute.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 55e29588129b8..b925b220f045d 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1621,7 +1621,7 @@ do not detect overflow. They are alsoavailable in an overflow-checking variant, suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when overflow is detected. -+------------------------+-------+-------------+-------------+--------------------------------+-------+ ++-------------------------+-------+-------------+-------------+--------------------------------+-------+ | Function name | Arity | Input types | Output type | Options class | Notes | +=========================+=======+=============+=============+================================+=======+ | cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeOptions` | \(1) | From 3e83b0058a347f866944a32dae685d74eec008ce Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 21 Jul 2023 16:33:33 +0900 Subject: [PATCH 12/12] Try --- python/pyarrow/_dataset_parquet.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 4ad0caec30798..738ac94d34753 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -741,7 +741,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): thrift_string_size_limit=self.thrift_string_size_limit, thrift_container_size_limit=self.thrift_container_size_limit, ) - return type(self)._reconstruct, (kwargs,) + return ParquetFragmentScanOptions._reconstruct, (kwargs,) cdef class ParquetFactoryOptions(_Weakrefable):