From e4009e28e5b9567f67345abd9bb40a876a3204a6 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 1 Jul 2024 12:03:23 +0900 Subject: [PATCH 001/122] GH-41527: [CI][Dev] Remove unncessary requirements for six (#43087) ### Rationale for this change The `six` requirement is a compatibility library for Python 2 and 3. Since Arrow no longer supports Python 2, we need to remove `six` requirement from merge script ([Python compatibility](https://arrow.apache.org/docs/python/install.html#python-compatibility)). ### What changes are included in this PR? - Removing `six` requirement - Updating `merge_arrow_pr.py` - `six.string_types` to `str` - `six_binary_type` to `bytes` ### Are these changes tested? Tested by CI. ### Are there any user-facing changes? Maybe not. It is for developers. * GitHub Issue: #41527 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- .github/workflows/dev.yml | 2 +- dev/merge_arrow_pr.py | 7 ++----- dev/requirements_merge_arrow_pr.txt | 1 - 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 1ea12b0a4d23d..5aec3638a8967 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -120,7 +120,7 @@ jobs: shell: bash run: | gem install test-unit - pip install "cython>=0.29.31" setuptools six pytest jira setuptools-scm + pip install "cython>=0.29.31" setuptools pytest jira setuptools-scm - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 344d943fd87e1..6694d2373b8f1 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -44,9 +44,6 @@ import requests import getpass -from six.moves import input -import six - try: import jira.client import jira.exceptions @@ -99,7 +96,7 @@ def get_json(url, headers=None): def run_cmd(cmd): - if isinstance(cmd, six.string_types): + if isinstance(cmd, str): cmd = cmd.split(' ') try: @@ -113,7 +110,7 @@ def run_cmd(cmd): print('--------------') raise e - if isinstance(output, six.binary_type): + if isinstance(output, bytes): output = output.decode('utf-8') return output diff --git a/dev/requirements_merge_arrow_pr.txt b/dev/requirements_merge_arrow_pr.txt index 7ac17dc1b1933..99909e434a580 100644 --- a/dev/requirements_merge_arrow_pr.txt +++ b/dev/requirements_merge_arrow_pr.txt @@ -1,3 +1,2 @@ jira requests -six From de6ef72828ced141165fb94032f1ca2ee380e82c Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 1 Jul 2024 16:07:14 +1200 Subject: [PATCH 002/122] GH-43076: [C#] Upgrade Xunit and change how Python integration tests are skipped (#43091) ### Rationale for this change See #43076. The previous Xunit upgrade was reverted due to this breaking how the Python C Data Interface integration tests were skipped. It looks like this is unlikely to be fixed in xunit or xunit.skippablefact soon (see https://github.com/AArnott/Xunit.SkippableFact/issues/32), so I've refactored the tests to work around the issue. ### What changes are included in this PR? Re-update xunit to 2.8.1 and refactor the `CDataSchemaPythonTest` class construction so that skipping these tests when the `PYTHONNET_PYDLL` environment variable isn't set works again. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #43076 Authored-by: Adam Reeve Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests.csproj | 2 +- .../Apache.Arrow.Tests.csproj | 2 +- .../CDataInterfacePythonTests.cs | 34 +++++++++++++------ 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index 65ca360c97814..bd97372d1021b 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 21ffe37cfa1af..5a5a92ccd2c7f 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 3b00525a1ae18..132f17fa212a5 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 71d7970f9ad7d..a3290e3be14ee 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -17,7 +17,7 @@ - + all runtime; build; native; contentfiles; analyzers diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 274434e4bab09..fee18d165cdbd 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -31,24 +31,19 @@ namespace Apache.Arrow.Tests { public class CDataSchemaPythonTest : IClassFixture { - class PythonNet : IDisposable + public class PythonNet : IDisposable { + public bool Initialized { get; } + public PythonNet() { - bool inCIJob = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; - bool inVerificationJob = Environment.GetEnvironmentVariable("TEST_CSHARP") == "1"; bool pythonSet = Environment.GetEnvironmentVariable("PYTHONNET_PYDLL") != null; - // We only skip if this is not in CI - if (inCIJob && !inVerificationJob && !pythonSet) - { - throw new Exception("PYTHONNET_PYDLL not set; skipping C Data Interface tests."); - } - else + if (!pythonSet) { - Skip.If(!pythonSet, "PYTHONNET_PYDLL not set; skipping C Data Interface tests."); + Initialized = false; + return; } - PythonEngine.Initialize(); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && @@ -57,6 +52,8 @@ public PythonNet() dynamic sys = Py.Import("sys"); sys.path.append(Path.Combine(Path.GetDirectoryName(Environment.GetEnvironmentVariable("PYTHONNET_PYDLL")), "DLLs")); } + + Initialized = true; } public void Dispose() @@ -65,6 +62,21 @@ public void Dispose() } } + public CDataSchemaPythonTest(PythonNet pythonNet) + { + if (!pythonNet.Initialized) + { + bool inCIJob = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; + bool inVerificationJob = Environment.GetEnvironmentVariable("TEST_CSHARP") == "1"; + + // Skip these tests if this is not in CI or is a verification job and PythonNet couldn't be initialized + Skip.If(inVerificationJob || !inCIJob, "PYTHONNET_PYDLL not set; skipping C Data Interface tests."); + + // Otherwise throw + throw new Exception("PYTHONNET_PYDLL not set; cannot run C Data Interface tests."); + } + } + private static Schema GetTestSchema() { using (Py.GIL()) From ae984f42d76470d03bfde09ed945ee4e814356b2 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 1 Jul 2024 14:08:28 +0900 Subject: [PATCH 003/122] MINOR: [Docs] Fix wrong list formatting for Integration Testing documentation (#43090) ### Rationale for this change Fix wrong list formatting in specification for Integration Testing ### What changes are included in this PR? Add new line to correct alignment. ### Are these changes tested? Yes. I have verified the changes by building the documentation. ### Are there any user-facing changes? Yes. the updated documentation will be visible to users. Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- docs/source/format/Integration.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index 436747989acf3..0ab5b832ad012 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -455,6 +455,7 @@ or ``DATA``. * ``VARIADIC_DATA_BUFFERS``: a JSON array of data buffers represented as hex encoded strings. * ``VIEWS``: a JSON array of encoded views, which are JSON objects with: + * ``SIZE``: an integer indicating the size of the view, * ``INLINED``: an encoded value (this field will be present if ``SIZE`` is smaller than 12, otherwise the next three fields will be present), From edfa343eeca008513f0300924380e1b187cc976b Mon Sep 17 00:00:00 2001 From: feik Date: Mon, 1 Jul 2024 03:12:46 -0400 Subject: [PATCH 004/122] GH-42168: [Python][Parquet] Pyarrow store decimal as integer (#42169) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR exposes the [store_decimal_as_integer](https://arrow.apache.org/docs/cpp/api/formats.html#_CPPv4N7parquet16WriterProperties7Builder31enable_store_decimal_as_integerEv) parquet writer property to pyarrow ### Rationale for this change This will allow storing fixed-point decimals as integers in the parquet format and take advantage of more efficient storage codecs ### What changes are included in this PR? Pyarrow parquet writer and related Cython ### Are these changes tested? Tests were included for the new parameter ### Are there any user-facing changes? Docstrings were updated * GitHub Issue: #42168 Lead-authored-by: feik Co-authored-by: Brian Kiefer Co-authored-by: Raúl Cumplido Signed-off-by: mwish --- python/pyarrow/_dataset_parquet.pyx | 2 + python/pyarrow/_parquet.pxd | 3 ++ python/pyarrow/_parquet.pyx | 19 +++++++- python/pyarrow/parquet/core.py | 21 ++++++++ python/pyarrow/tests/parquet/test_basic.py | 56 ++++++++++++++++++++++ 5 files changed, 99 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 4942336a12666..a7afd065b592e 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -613,6 +613,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): write_page_index=self._properties["write_page_index"], write_page_checksum=self._properties["write_page_checksum"], sorting_columns=self._properties["sorting_columns"], + store_decimal_as_integer=self._properties["store_decimal_as_integer"], ) def _set_arrow_properties(self): @@ -664,6 +665,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): encryption_config=None, write_page_checksum=False, sorting_columns=None, + store_decimal_as_integer=False, ) self._set_properties() diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 950980678d595..35d15227ee5dc 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -432,6 +432,8 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* disable_statistics() Builder* enable_statistics() Builder* enable_statistics(const c_string& path) + Builder* enable_store_decimal_as_integer() + Builder* disable_store_decimal_as_integer() Builder* data_pagesize(int64_t size) Builder* encoding(ParquetEncoding encoding) Builder* encoding(const c_string& path, @@ -595,6 +597,7 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( write_page_index=*, write_page_checksum=*, sorting_columns=*, + store_decimal_as_integer=*, ) except * diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index b3bb0f0655b64..6c5b0af826b4e 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1831,7 +1831,9 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( dictionary_pagesize_limit=None, write_page_index=False, write_page_checksum=False, - sorting_columns=None) except *: + sorting_columns=None, + store_decimal_as_integer=False) except *: + """General writer properties""" cdef: shared_ptr[WriterProperties] properties @@ -1942,6 +1944,16 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( "'use_byte_stream_split' cannot be passed" "together with 'column_encoding'") + # store_decimal_as_integer + + if isinstance(store_decimal_as_integer, bool): + if store_decimal_as_integer: + props.enable_store_decimal_as_integer() + else: + props.disable_store_decimal_as_integer() + else: + raise TypeError("'store_decimal_as_integer' must be a boolean") + # column_encoding # encoding map - encode individual columns @@ -2115,6 +2127,7 @@ cdef class ParquetWriter(_Weakrefable): int64_t write_batch_size int64_t dictionary_pagesize_limit object store_schema + object store_decimal_as_integer def __cinit__(self, where, Schema schema not None, use_dictionary=None, compression=None, version=None, @@ -2136,7 +2149,8 @@ cdef class ParquetWriter(_Weakrefable): store_schema=True, write_page_index=False, write_page_checksum=False, - sorting_columns=None): + sorting_columns=None, + store_decimal_as_integer=False): cdef: shared_ptr[WriterProperties] properties shared_ptr[ArrowWriterProperties] arrow_properties @@ -2170,6 +2184,7 @@ cdef class ParquetWriter(_Weakrefable): write_page_index=write_page_index, write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, + store_decimal_as_integer=store_decimal_as_integer, ) arrow_properties = _create_arrow_writer_properties( use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index bc7cc342aea7e..6ca6f7089e75c 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -873,6 +873,23 @@ def _sanitize_table(table, new_schema, flavor): Specify the sort order of the data being written. The writer does not sort the data nor does it verify that the data is sorted. The sort order is written to the row group metadata, which can then be used by readers. +store_decimal_as_integer : bool, default False + Allow decimals with 1 <= precision <= 18 to be stored as integers. + In Parquet, DECIMAL can be stored in any of the following physical types: + - int32: for 1 <= precision <= 9. + - int64: for 10 <= precision <= 18. + - fixed_len_byte_array: precision is limited by the array size. + Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits. + - binary: precision is unlimited. The minimum number of bytes to store the + unscaled value is used. + + By default, this is DISABLED and all decimal types annotate fixed_len_byte_array. + When enabled, the writer will use the following physical types to store decimals: + - int32: for 1 <= precision <= 9. + - int64: for 10 <= precision <= 18. + - fixed_len_byte_array: for precision > 18. + + As a consequence, decimal columns stored in integer types are more compact. """ _parquet_writer_example_doc = """\ @@ -968,6 +985,7 @@ def __init__(self, where, schema, filesystem=None, write_page_index=False, write_page_checksum=False, sorting_columns=None, + store_decimal_as_integer=False, **options): if use_deprecated_int96_timestamps is None: # Use int96 timestamps for Spark @@ -1020,6 +1038,7 @@ def __init__(self, where, schema, filesystem=None, write_page_index=write_page_index, write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, + store_decimal_as_integer=store_decimal_as_integer, **options) self.is_open = True @@ -1873,6 +1892,7 @@ def write_table(table, where, row_group_size=None, version='2.6', write_page_index=False, write_page_checksum=False, sorting_columns=None, + store_decimal_as_integer=False, **kwargs): # Implementor's note: when adding keywords here / updating defaults, also # update it in write_to_dataset and _dataset_parquet.pyx ParquetFileWriteOptions @@ -1903,6 +1923,7 @@ def write_table(table, where, row_group_size=None, version='2.6', write_page_index=write_page_index, write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, + store_decimal_as_integer=store_decimal_as_integer, **kwargs) as writer: writer.write_table(table, row_group_size=row_group_size) except Exception: diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 56b967a0595b8..194af7415e863 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -15,10 +15,12 @@ # specific language governing permissions and limitations # under the License. +import os from collections import OrderedDict import io import warnings from shutil import copytree +from decimal import Decimal import numpy as np import pytest @@ -357,6 +359,60 @@ def test_byte_stream_split(): use_dictionary=False) +def test_store_decimal_as_integer(tempdir): + arr_decimal_1_9 = pa.array(list(map(Decimal, range(100))), + type=pa.decimal128(5, 2)) + arr_decimal_10_18 = pa.array(list(map(Decimal, range(100))), + type=pa.decimal128(16, 9)) + arr_decimal_gt18 = pa.array(list(map(Decimal, range(100))), + type=pa.decimal128(22, 2)) + arr_bool = pa.array([True, False] * 50) + data_decimal = [arr_decimal_1_9, arr_decimal_10_18, arr_decimal_gt18] + table = pa.Table.from_arrays(data_decimal, names=['a', 'b', 'c']) + + # Check with store_decimal_as_integer. + _check_roundtrip(table, + expected=table, + compression="gzip", + use_dictionary=False, + store_decimal_as_integer=True) + + # Check physical type in parquet schema + pqtestfile_path = os.path.join(tempdir, 'test.parquet') + pq.write_table(table, pqtestfile_path, + compression="gzip", + use_dictionary=False, + store_decimal_as_integer=True) + + pqtestfile = pq.ParquetFile(pqtestfile_path) + pqcol_decimal_1_9 = pqtestfile.schema.column(0) + pqcol_decimal_10_18 = pqtestfile.schema.column(1) + + assert pqcol_decimal_1_9.physical_type == 'INT32' + assert pqcol_decimal_10_18.physical_type == 'INT64' + + # Check with store_decimal_as_integer and delta-int encoding. + # DELTA_BINARY_PACKED requires parquet physical type to be INT64 or INT32 + _check_roundtrip(table, + expected=table, + compression="gzip", + use_dictionary=False, + store_decimal_as_integer=True, + column_encoding={ + 'a': 'DELTA_BINARY_PACKED', + 'b': 'DELTA_BINARY_PACKED' + }) + + # Check with mixed column types. + mixed_table = pa.Table.from_arrays( + [arr_decimal_1_9, arr_decimal_10_18, arr_decimal_gt18, arr_bool], + names=['a', 'b', 'c', 'd']) + _check_roundtrip(mixed_table, + expected=mixed_table, + use_dictionary=False, + store_decimal_as_integer=True) + + def test_column_encoding(): arr_float = pa.array(list(map(float, range(100)))) arr_int = pa.array(list(map(int, range(100)))) From e9f35ffc31f419870c82c67ad5b72c781f64fd8b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Mon, 1 Jul 2024 17:11:35 -0700 Subject: [PATCH 005/122] GH-42198: [C++] Fix GetRecordBatchPayload crashes for device data (#42199) ### Rationale for this change Ensuring that creating IPC payloads works correctly for non-CPU data by utilizing `CopyBufferSliceToCPU`. ### What changes are included in this PR? Adding calls to `CopyBufferSliceToCPU` to the Ipc Writer for base binary types and for list types, to avoid calls to `value_offset` in those cases. ### Are these changes tested? Yes. Tests are added to cuda_test.cc ### Are there any user-facing changes? No. * GitHub Issue: #42198 --- cpp/src/arrow/gpu/cuda_test.cc | 43 ++++++++++++++++++++++++++++++++++ cpp/src/arrow/ipc/writer.cc | 28 ++++++++++++++++++---- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc index 4c450bf389919..4c9b961fa1465 100644 --- a/cpp/src/arrow/gpu/cuda_test.cc +++ b/cpp/src/arrow/gpu/cuda_test.cc @@ -679,6 +679,49 @@ TEST_F(TestCudaArrowIpc, BasicWriteRead) { CompareBatch(*batch, *cpu_batch); } +TEST_F(TestCudaArrowIpc, WriteIpcString) { + auto values = ArrayFromJSON(utf8(), R"(["foo", null, "quux"])"); + ASSERT_OK_AND_ASSIGN(auto values_device, values->CopyTo(mm_)); + auto batch = RecordBatch::Make(schema({field("vals", utf8())}), 3, + {values_device->data()}, DeviceAllocationType::kCUDA); + + ipc::IpcPayload payload; + ASSERT_OK( + ipc::GetRecordBatchPayload(*batch, ipc::IpcWriteOptions::Defaults(), &payload)); + + ASSERT_EQ(values_device->data()->buffers[0]->address(), + payload.body_buffers[0]->address()); + ASSERT_EQ(values_device->data()->buffers[1]->address(), + payload.body_buffers[1]->address()); +} + +TEST_F(TestCudaArrowIpc, WriteIpcList) { + auto values = + ArrayFromJSON(list(utf8()), R"([["foo", null], null, ["quux", "bar", "baz"]])"); + ASSERT_OK_AND_ASSIGN(auto values_device, values->CopyTo(mm_)); + auto batch = RecordBatch::Make(schema({field("vals", list(utf8()))}), 3, + {values_device->data()}, DeviceAllocationType::kCUDA); + + ipc::IpcPayload payload; + ASSERT_OK( + ipc::GetRecordBatchPayload(*batch, ipc::IpcWriteOptions::Defaults(), &payload)); + + ASSERT_EQ(values_device->data()->buffers[0]->address(), + payload.body_buffers[0]->address()); +} + +TEST_F(TestCudaArrowIpc, WriteIpcSlicedRecord) { + std::shared_ptr batch; + ASSERT_OK(ipc::test::MakeListRecordBatch(&batch)); + + ASSERT_OK_AND_ASSIGN(auto batch_device, batch->CopyTo(mm_)); + auto sliced_batch_device = batch_device->Slice(10); + + ipc::IpcPayload payload; + ASSERT_NOT_OK(ipc::GetRecordBatchPayload(*sliced_batch_device, + ipc::IpcWriteOptions::Defaults(), &payload)); +} + TEST_F(TestCudaArrowIpc, DictionaryWriteRead) { std::shared_ptr batch; ASSERT_OK(ipc::test::MakeDictionary(&batch)); diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 549fb34a2e880..f603e60c66555 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -154,6 +154,11 @@ class RecordBatchSerializer { return Status::CapacityError("Cannot write arrays larger than 2^31 - 1 in length"); } + if (arr.offset() != 0 && arr.device_type() != DeviceAllocationType::kCPU) { + // https://github.com/apache/arrow/issues/43029 + return Status::NotImplemented("Cannot compute null count for non-cpu sliced array"); + } + // push back all common elements field_nodes_.push_back({arr.length(), arr.null_count(), 0}); @@ -449,14 +454,22 @@ class RecordBatchSerializer { template enable_if_base_binary Visit(const T& array) { + using offset_type = typename T::offset_type; + std::shared_ptr value_offsets; RETURN_NOT_OK(GetZeroBasedValueOffsets(array, &value_offsets)); auto data = array.value_data(); int64_t total_data_bytes = 0; - if (value_offsets) { - total_data_bytes = array.value_offset(array.length()) - array.value_offset(0); + if (value_offsets && array.length() > 0) { + offset_type last_offset_value; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + value_offsets, array.length() * sizeof(offset_type), sizeof(offset_type), + reinterpret_cast(&last_offset_value))); + + total_data_bytes = last_offset_value; } + if (NeedTruncate(array.offset(), data.get(), total_data_bytes)) { // Slice the data buffer to include only the range we need now const int64_t start_offset = array.value_offset(0); @@ -495,8 +508,15 @@ class RecordBatchSerializer { offset_type values_offset = 0; offset_type values_length = 0; if (value_offsets) { - values_offset = array.value_offset(0); - values_length = array.value_offset(array.length()) - values_offset; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + array.value_offsets(), array.offset() * sizeof(offset_type), + sizeof(offset_type), reinterpret_cast(&values_offset))); + offset_type last_values_offset = 0; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + array.value_offsets(), (array.offset() + array.length()) * sizeof(offset_type), + sizeof(offset_type), reinterpret_cast(&last_values_offset))); + + values_length = last_values_offset - values_offset; } if (array.offset() != 0 || values_length < values->length()) { From 76de7c2774655969aa7471aec9c0b4af8a4ae3fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 2 Jul 2024 03:04:28 +0200 Subject: [PATCH 006/122] GH-43059: [CI][Gandiva] Disable Python Gandiva tests on AlmaLinux 8 (#43093) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The newer version of LLVM on AlmaLinux 8 fails on the pyarrow.gandiva tests ### What changes are included in this PR? Temporarily remove Gandiva on Python checks for AlmaLinux 8. ### Are these changes tested? Via CI ### Are there any user-facing changes? No * GitHub Issue: #43059 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- dev/tasks/python-wheels/github.linux.yml | 1 + dev/tasks/verify-rc/github.linux.amd64.docker.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml index 0ff3c56b695eb..5c82bf74b30b7 100644 --- a/dev/tasks/python-wheels/github.linux.yml +++ b/dev/tasks/python-wheels/github.linux.yml @@ -67,6 +67,7 @@ jobs: ALMALINUX: "8" run: | archery docker run \ + -e ARROW_GANDIVA=OFF \ -e TEST_DEFAULT=0 \ -e TEST_PYARROW_VERSION={{ arrow.no_rc_version }} \ -e TEST_PYTHON_VERSIONS={{ python_version }} \ diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index 7a28ba705dd50..f2c0673314826 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -38,6 +38,9 @@ jobs: run: | archery docker run \ -e VERIFY_VERSION="{{ release|default("") }}" \ + {% if distro == 'almalinux' and target|upper == 'PYTHON' %} + -e ARROW_GANDIVA=OFF \ + {% endif %} -e VERIFY_RC="{{ rc|default("") }}" \ -e TEST_DEFAULT=0 \ -e TEST_{{ target|upper }}=1 \ From 5386b575fb39be81aa2ba62b07f5a5c4d6c1ebfc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 10:10:16 +0900 Subject: [PATCH 007/122] MINOR: [JS] Bump @swc/core from 1.5.24 to 1.6.6 in /js (#43102) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@ swc/core](https://github.com/swc-project/swc) from 1.5.24 to 1.6.6.
Changelog

Sourced from @​swc/core's changelog.

[1.6.6] - 2024-06-29

Bug Fixes

Performance

[1.6.4] - 2024-06-22

Features

  • (cli) Support generating .d.ts files (#9097) (e71b6d3)

  • (common) Add fallible methods to swc_common::SourceMap (#9090) (e423d1b)

  • (es/transform) Add experimental .d.ts generation (#9093) (51a2702)

  • (swc_core) Reexport swc_typescript (#9092) (a2645e2)

  • (typescript) Implement Isolated Declaration (#9086) (a855159)

Miscellaneous Tasks

Performance

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ swc/core&package-manager=npm_and_yarn&previous-version=1.5.24&new-version=1.6.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 138 ++++++++++++++++++++++++------------------------ 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/js/package.json b/js/package.json index ecb6d3a366f7e..09e70184619fe 100644 --- a/js/package.json +++ b/js/package.json @@ -67,7 +67,7 @@ "@rollup/plugin-alias": "5.1.0", "@rollup/plugin-node-resolve": "15.2.3", "@rollup/stream": "3.0.1", - "@swc/core": "1.5.24", + "@swc/core": "1.6.6", "@types/benchmark": "2.1.5", "@types/glob": "8.1.0", "@types/jest": "29.5.12", diff --git a/js/yarn.lock b/js/yarn.lock index 0cfcede67ffda..2110063afac0d 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1139,74 +1139,74 @@ dependencies: "@sinonjs/commons" "^3.0.0" -"@swc/core-darwin-arm64@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.5.24.tgz#71875695bc617e57c2d93352f48317b4c41e0240" - integrity sha512-M7oLOcC0sw+UTyAuL/9uyB9GeO4ZpaBbH76JSH6g1m0/yg7LYJZGRmplhDmwVSDAR5Fq4Sjoi1CksmmGkgihGA== - -"@swc/core-darwin-x64@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.5.24.tgz#6b4c3eb9b21ab50b7324a82c9497ffeb2e8e0a57" - integrity sha512-MfcFjGGYognpSBSos2pYUNYJSmqEhuw5ceGr6qAdME7ddbjGXliza4W6FggsM+JnWwpqa31+e7/R+GetW4WkaQ== - -"@swc/core-linux-arm-gnueabihf@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.5.24.tgz#5730ed6ad86afe4ee8df04ee6f21430daead186c" - integrity sha512-amI2pwtcWV3E/m/nf+AQtn1LWDzKLZyjCmWd3ms7QjEueWYrY8cU1Y4Wp7wNNsxIoPOi8zek1Uj2wwFD/pttNQ== - -"@swc/core-linux-arm64-gnu@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.5.24.tgz#0a2478e8601391aa88f82bfece1dbc60d27cbcfd" - integrity sha512-sTSvmqMmgT1ynH/nP75Pc51s+iT4crZagHBiDOf5cq+kudUYjda9lWMs7xkXB/TUKFHPCRK0HGunl8bkwiIbuw== - -"@swc/core-linux-arm64-musl@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.5.24.tgz#e0199092dc611ca75f8a92dcea17de44e38f3fbf" - integrity sha512-vd2/hfOBGbrX21FxsFdXCUaffjkHvlZkeE2UMRajdXifwv79jqOHIJg3jXG1F3ZrhCghCzirFts4tAZgcG8XWg== - -"@swc/core-linux-x64-gnu@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.5.24.tgz#1fe347c9f28457c593f2fda5b0d4904a2b105ecd" - integrity sha512-Zrdzi7NqzQxm2BvAG5KyOSBEggQ7ayrxh599AqqevJmsUXJ8o2nMiWQOBvgCGp7ye+Biz3pvZn1EnRzAp+TpUg== - -"@swc/core-linux-x64-musl@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.5.24.tgz#bf6ac583fac211d704d2d78cfd0b7bf751268f5e" - integrity sha512-1F8z9NRi52jdZQCGc5sflwYSctL6omxiVmIFVp8TC9nngjQKc00TtX/JC2Eo2HwvgupkFVl5YQJidAck9YtmJw== - -"@swc/core-win32-arm64-msvc@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.5.24.tgz#41b9faf4db69cc08a43c3a176df2a7b94d765637" - integrity sha512-cKpP7KvS6Xr0jFSTBXY53HZX/YfomK5EMQYpCVDOvfsZeYHN20sQSKXfpVLvA/q2igVt1zzy1XJcOhpJcgiKLg== - -"@swc/core-win32-ia32-msvc@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.5.24.tgz#e123ad00e3b28d567d3851a86697fb3c54ed817a" - integrity sha512-IoPWfi0iwqjZuf7gE223+B97/ZwkKbu7qL5KzGP7g3hJrGSKAvv7eC5Y9r2iKKtLKyv5R/T6Ho0kFR/usi7rHw== - -"@swc/core-win32-x64-msvc@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.5.24.tgz#21fb87b1981253039e6d45255e31a875f446e397" - integrity sha512-zHgF2k1uVJL8KIW+PnVz1To4a3Cz9THbh2z2lbehaF/gKHugH4c3djBozU4das1v35KOqf5jWIEviBLql2wDLQ== - -"@swc/core@1.5.24": - version "1.5.24" - resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.5.24.tgz#9ecb4601cb6a4fb19f227ec5fb59d07e23347dca" - integrity sha512-Eph9zvO4xvqWZGVzTdtdEJ0Vqf0VIML/o/e4Qd2RLOqtfgnlRi7avmMu5C0oqciJ0tk+hqdUKVUZ4JPoPaiGvQ== +"@swc/core-darwin-arm64@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.6.6.tgz#9488d50394cb08713c4321a940b48599c1c5e153" + integrity sha512-5DA8NUGECcbcK1YLKJwNDKqdtTYDVnkfDU1WvQSXq/rU+bjYCLtn5gCe8/yzL7ISXA6rwqPU1RDejhbNt4ARLQ== + +"@swc/core-darwin-x64@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.6.6.tgz#0b13ae43e1821fd447acfb789979c59bec2d0081" + integrity sha512-2nbh/RHpweNRsJiYDFk1KcX7UtaKgzzTNUjwtvK5cp0wWrpbXmPvdlWOx3yzwoiSASDFx78242JHHXCIOlEdsw== + +"@swc/core-linux-arm-gnueabihf@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.6.6.tgz#8dd3e76b887478cedd38d34f1de3c0b8f853d1b8" + integrity sha512-YgytuyUfR7b0z0SRHKV+ylr83HmgnROgeT7xryEkth6JGpAEHooCspQ4RrWTU8+WKJ7aXiZlGXPgybQ4TiS+TA== + +"@swc/core-linux-arm64-gnu@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.6.6.tgz#4d6369975d8a077f01cf9f6cee60402529ef67a6" + integrity sha512-yGwx9fddzEE0iURqRVwKBQ4IwRHE6hNhl15WliHpi/PcYhzmYkUIpcbRXjr0dssubXAVPVnx6+jZVDSbutvnfg== + +"@swc/core-linux-arm64-musl@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.6.6.tgz#5f0ec779e465242796277d07a3100bd5ccaec6ef" + integrity sha512-a6fMbqzSAsS5KCxFJyg1mD5kwN3ZFO8qQLyJ75R/htZP/eCt05jrhmOI7h2n+1HjiG332jLnZ9S8lkVE5O8Nqw== + +"@swc/core-linux-x64-gnu@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.6.6.tgz#30a92064e016f29b8fe85500fa4e71050c60ae74" + integrity sha512-hRGsUKNzzZle28YF0dYIpN0bt9PceR9LaVBq7x8+l9TAaDLFbgksSxcnU/ubTtsy+WsYSYGn+A83w3xWC0O8CQ== + +"@swc/core-linux-x64-musl@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.6.6.tgz#59a14e4a90644142b8c90972f5d29cfdde88de78" + integrity sha512-NokIUtFxJDVv3LzGeEtYMTV3j2dnGKLac59luTeq36DQLZdJQawQIdTbzzWl2jE7lxxTZme+dhsVOH9LxE3ceg== + +"@swc/core-win32-arm64-msvc@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.6.6.tgz#c81f6e9be1df76273100a422ceec887781244b5d" + integrity sha512-lzYdI4qb4k1dFG26yv+9Jaq/bUMAhgs/2JsrLncGjLof86+uj74wKYCQnbzKAsq2hDtS5DqnHnl+//J+miZfGA== + +"@swc/core-win32-ia32-msvc@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.6.6.tgz#0381e95282fdcf5f5f9731b56dad5c5e4da870ce" + integrity sha512-bvl7FMaXIJQ76WZU0ER4+RyfKIMGb6S2MgRkBhJOOp0i7VFx4WLOnrmMzaeoPJaJSkityVKAftfNh7NBzTIydQ== + +"@swc/core-win32-x64-msvc@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.6.6.tgz#e511013aa3f71125d6385123469cdd30db141070" + integrity sha512-WAP0JoCTfgeYKgOeYJoJV4ZS0sQUmU3OwvXa2dYYtMLF7zsNqOiW4niU7QlThBHgUv/qNZm2p6ITEgh3w1cltw== + +"@swc/core@1.6.6": + version "1.6.6" + resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.6.6.tgz#fefaa3a6bdd1c6991a9ed67648bc058a0d29d4b8" + integrity sha512-sHfmIUPUXNrQTwFMVCY5V5Ena2GTOeaWjS2GFUpjLhAgVfP90OP67DWow7+cYrfFtqBdILHuWnjkTcd0+uPKlg== dependencies: "@swc/counter" "^0.1.3" - "@swc/types" "^0.1.7" + "@swc/types" "^0.1.9" optionalDependencies: - "@swc/core-darwin-arm64" "1.5.24" - "@swc/core-darwin-x64" "1.5.24" - "@swc/core-linux-arm-gnueabihf" "1.5.24" - "@swc/core-linux-arm64-gnu" "1.5.24" - "@swc/core-linux-arm64-musl" "1.5.24" - "@swc/core-linux-x64-gnu" "1.5.24" - "@swc/core-linux-x64-musl" "1.5.24" - "@swc/core-win32-arm64-msvc" "1.5.24" - "@swc/core-win32-ia32-msvc" "1.5.24" - "@swc/core-win32-x64-msvc" "1.5.24" + "@swc/core-darwin-arm64" "1.6.6" + "@swc/core-darwin-x64" "1.6.6" + "@swc/core-linux-arm-gnueabihf" "1.6.6" + "@swc/core-linux-arm64-gnu" "1.6.6" + "@swc/core-linux-arm64-musl" "1.6.6" + "@swc/core-linux-x64-gnu" "1.6.6" + "@swc/core-linux-x64-musl" "1.6.6" + "@swc/core-win32-arm64-msvc" "1.6.6" + "@swc/core-win32-ia32-msvc" "1.6.6" + "@swc/core-win32-x64-msvc" "1.6.6" "@swc/counter@^0.1.3": version "0.1.3" @@ -1220,10 +1220,10 @@ dependencies: tslib "^2.4.0" -"@swc/types@^0.1.7": - version "0.1.7" - resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.7.tgz#ea5d658cf460abff51507ca8d26e2d391bafb15e" - integrity sha512-scHWahbHF0eyj3JsxG9CFJgFdFNaVQCNAimBlT6PzS3n/HptxqREjsm4OH6AN3lYcffZYSPxXW8ua2BEHp0lJQ== +"@swc/types@^0.1.9": + version "0.1.9" + resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.9.tgz#e67cdcc2e4dd74a3cef4474b465eb398e7ae83e2" + integrity sha512-qKnCno++jzcJ4lM4NTfYifm1EFSCeIfKiAHAfkENZAV5Kl9PjJIyd2yeeVv6c/2CckuLyv2NmRC5pv6pm2WQBg== dependencies: "@swc/counter" "^0.1.3" From 5534b21a687a33a561dadc44db5a563675aa40df Mon Sep 17 00:00:00 2001 From: abandy Date: Mon, 1 Jul 2024 21:15:03 -0400 Subject: [PATCH 008/122] GH-43092: [Swift] Update ArrowData for Nested Types (allow children) (#43086) ### Rationale for this change Struct (Nested Types) need children property on ArrowData ### What changes are included in this PR? children property added to ArrowData an constructor updated (original constructor moved to convenience constructor so it must call the updated constructor but keeps the original behavior for existing types) * GitHub Issue: #43092 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowData.swift | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowData.swift b/swift/Arrow/Sources/Arrow/ArrowData.swift index 5e23e60c164b5..2728b9fc8b6fe 100644 --- a/swift/Arrow/Sources/Arrow/ArrowData.swift +++ b/swift/Arrow/Sources/Arrow/ArrowData.swift @@ -20,11 +20,18 @@ import Foundation public class ArrowData { public let type: ArrowType public let buffers: [ArrowBuffer] + public let children: [ArrowData] public let nullCount: UInt public let length: UInt public let stride: Int - init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws { + convenience init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws { + try self.init(arrowType, buffers: buffers, + children: [ArrowData](), nullCount: nullCount, + length: buffers[1].length) + } + + init(_ arrowType: ArrowType, buffers: [ArrowBuffer], children: [ArrowData], nullCount: UInt, length: UInt) throws { let infoType = arrowType.info switch infoType { case let .primitiveInfo(typeId): @@ -47,8 +54,9 @@ public class ArrowData { self.type = arrowType self.buffers = buffers + self.children = children self.nullCount = nullCount - self.length = buffers[1].length + self.length = length self.stride = arrowType.getStride() } From 9ec9c06e488ac5ba35dcc890891be54cd1a45eda Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:31:24 +0900 Subject: [PATCH 009/122] MINOR: [JS] Bump typedoc from 0.25.13 to 0.26.3 in /js (#43100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [typedoc](https://github.com/TypeStrong/TypeDoc) from 0.25.13 to 0.26.3.
Release notes

Sourced from typedoc's releases.

v0.26.3

Features

  • "On This Page" navigation now includes the page groups in collapsible sections, #2616.

Bug Fixes

  • mailto: links are no longer incorrectly recognized as relative paths, #2613.
  • Added @ since to the default list of recognized tags, #2614.
  • Relative paths to directories will no longer cause the directory to be copied into the media directory, #2617.

v0.26.2

Features

  • Added a --suppressCommentWarningsInDeclarationFiles option to disable warnings from parsing comments in declaration files, #2611.
  • Improved comment discovery to more closely match TypeScript's discovery when getting comments for members of interfaces/classes, #2084, #2545.

Bug Fixes

  • The text non-highlighted language no longer causes warnings when rendering, #2610.
  • If a comment on a method is inherited from a parent class, and the child class does not use an @ param tag from the parent, TypeDoc will no longer warn about the @ param tag.

v0.26.1

Features

  • Improved Korean translation coverage, #2602.

Bug Fixes

  • Added @ author to the default list of recognized tags, #2603.
  • Anchor links are no longer incorrectly checked for relative paths, #2604.
  • Fixed an issue where line numbers reported in error messages could be incorrect, #2605.
  • Fixed relative link detection for markdown links containing code in their label, #2606.
  • Fixed an issue with packages mode where TypeDoc would use (much) more memory than required, #2607.
  • TypeDoc will no longer crash when asked to render highlighted code for an unsupported language, #2609.
  • Fixed an issue where relatively-linked files would not be copied to the output directory in packages mode.
  • Fixed an issue where modifier tags were not applied to top level modules in packages mode.
  • Fixed an issue where excluded tags were not removed from top level modules in packages mode.
  • .jsonc configuration files are now properly read as JSONC, rather than being passed to require.

Thanks!

... (truncated)

Changelog

Sourced from typedoc's changelog.

v0.26.3 (2024-06-28)

Features

  • "On This Page" navigation now includes the page groups in collapsible sections, #2616.

Bug Fixes

  • mailto: links are no longer incorrectly recognized as relative paths, #2613.
  • Added @ since to the default list of recognized tags, #2614.
  • Relative paths to directories will no longer cause the directory to be copied into the media directory, #2617.

v0.26.2 (2024-06-24)

Features

  • Added a --suppressCommentWarningsInDeclarationFiles option to disable warnings from parsing comments in declaration files, #2611.
  • Improved comment discovery to more closely match TypeScript's discovery when getting comments for members of interfaces/classes, #2084, #2545.

Bug Fixes

  • The text non-highlighted language no longer causes warnings when rendering, #2610.
  • If a comment on a method is inherited from a parent class, and the child class does not use an @ param tag from the parent, TypeDoc will no longer warn about the @ param tag.

v0.26.1 (2024-06-22)

Features

  • Improved Korean translation coverage, #2602.

Bug Fixes

  • Added @ author to the default list of recognized tags, #2603.
  • Anchor links are no longer incorrectly checked for relative paths, #2604.
  • Fixed an issue where line numbers reported in error messages could be incorrect, #2605.
  • Fixed relative link detection for markdown links containing code in their label, #2606.
  • Fixed an issue with packages mode where TypeDoc would use (much) more memory than required, #2607.
  • TypeDoc will no longer crash when asked to render highlighted code for an unsupported language, #2609.
  • Fixed an issue where relatively-linked files would not be copied to the output directory in packages mode.
  • Fixed an issue where modifier tags were not applied to top level modules in packages mode.
  • Fixed an issue where excluded tags were not removed from top level modules in packages mode.
  • .jsonc configuration files are now properly read as JSONC, rather than being passed to require.

Thanks!

... (truncated)

Commits
  • 8932856 Update changelog for release
  • cb30e33 Bump version to 0.26.3
  • 317650e Update deps
  • da4f8fd Fix lint issue
  • 0304c26 Include page groups in page TOC
  • 9f0fb04 Do not copy relative paths to directories
  • 918d8aa Correct handling of mailto: links
  • 22fc83e Add @ since to the default list of recognized tags
  • e5ba1e8 Update changelog for release
  • f20ec83 Bump version to 0.26.2
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=typedoc&package-manager=npm_and_yarn&previous-version=0.25.13&new-version=0.26.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 108 +++++++++++++++++++++++++++++------------------- 2 files changed, 66 insertions(+), 44 deletions(-) diff --git a/js/package.json b/js/package.json index 09e70184619fe..03e8c0159155d 100644 --- a/js/package.json +++ b/js/package.json @@ -107,7 +107,7 @@ "rxjs": "7.8.1", "ts-jest": "29.1.4", "ts-node": "10.9.2", - "typedoc": "0.25.13", + "typedoc": "0.26.3", "typescript": "5.4.5", "vinyl-buffer": "1.0.1", "vinyl-named": "1.1.0", diff --git a/js/yarn.lock b/js/yarn.lock index 2110063afac0d..661161dfa05a2 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1120,6 +1120,11 @@ resolved "https://registry.yarnpkg.com/@rollup/stream/-/stream-3.0.1.tgz#485452d6f1016ac1b0513060f90ff02aaca3e1c0" integrity sha512-wdzoakLc9UiPOFa1k17ukfEtvQ0p7JuNFvOZT1DhO5Z5CrTf71An01U9+v+aebYcaLCwy3tLwpCSUF7K7xVN0A== +"@shikijs/core@1.10.0": + version "1.10.0" + resolved "https://registry.yarnpkg.com/@shikijs/core/-/core-1.10.0.tgz#ec3356ace7cb8b41f6baee0116f036fca85054cc" + integrity sha512-BZcr6FCmPfP6TXaekvujZcnkFmJHZ/Yglu97r/9VjzVndQA56/F4WjUKtJRQUnK59Wi7p/UTAOekMfCJv7jnYg== + "@sinclair/typebox@^0.27.8": version "0.27.8" resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.27.8.tgz#6667fac16c436b5434a387a34dedb013198f6e6e" @@ -1740,11 +1745,6 @@ ansi-regex@^6.0.1: resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.0.1.tgz#3183e38fae9a65d7cb5e53945cd5897d0260a06a" integrity sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA== -ansi-sequence-parser@^1.1.0: - version "1.1.1" - resolved "https://registry.yarnpkg.com/ansi-sequence-parser/-/ansi-sequence-parser-1.1.1.tgz#e0aa1cdcbc8f8bb0b5bca625aac41f5f056973cf" - integrity sha512-vJXt3yiaUL4UU546s3rPXlsry/RnM730G1+HkpKE012AN0sx1eOrxSu95oKDIonskeLTijMgqWZ3uDEe3NFvyg== - ansi-styles@^3.2.1: version "3.2.1" resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d" @@ -2839,6 +2839,11 @@ enhanced-resolve@^5.16.0: graceful-fs "^4.2.4" tapable "^2.2.0" +entities@^4.4.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + errno@^0.1.3: version "0.1.8" resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.8.tgz#8bb3e9c7d463be4976ff888f76b4809ebc2e811f" @@ -4854,11 +4859,6 @@ json5@^2.2.3: resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283" integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg== -jsonc-parser@^3.2.0: - version "3.2.1" - resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.2.1.tgz#031904571ccf929d7670ee8c547545081cb37f1a" - integrity sha512-AilxAyFOAcK5wA1+LeaySVBrHsGQvUFCDWXKpZjzaL0PqW+xfBOttn8GNtWKFWqneyMZj41MWF9Kl6iPWLwgOA== - jsonfile@^6.0.1: version "6.1.0" resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae" @@ -4980,6 +4980,13 @@ lines-and-columns@^1.1.6: resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== +linkify-it@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-5.0.0.tgz#9ef238bfa6dc70bd8e7f9572b52d369af569b421" + integrity sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ== + dependencies: + uc.micro "^2.0.0" + load-json-file@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-1.1.0.tgz#956905708d58b4bab4c2261b04f59f31c99374c0" @@ -5139,10 +5146,17 @@ map-visit@^1.0.0: dependencies: object-visit "^1.0.0" -marked@^4.3.0: - version "4.3.0" - resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3" - integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A== +markdown-it@^14.1.0: + version "14.1.0" + resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-14.1.0.tgz#3c3c5992883c633db4714ccb4d7b5935d98b7d45" + integrity sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg== + dependencies: + argparse "^2.0.1" + entities "^4.4.0" + linkify-it "^5.0.0" + mdurl "^2.0.0" + punycode.js "^2.3.1" + uc.micro "^2.1.0" matchdep@^2.0.0: version "2.0.0" @@ -5154,6 +5168,11 @@ matchdep@^2.0.0: resolve "^1.4.0" stack-trace "0.0.10" +mdurl@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-2.0.0.tgz#80676ec0433025dd3e17ee983d0fe8de5a2237e0" + integrity sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w== + memfs@4.9.2: version "4.9.2" resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.9.2.tgz#42e7b48207268dad8c9c48ea5d4952c5d3840433" @@ -5270,10 +5289,10 @@ minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: dependencies: brace-expansion "^1.1.7" -minimatch@^9.0.3, minimatch@^9.0.4: - version "9.0.4" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.4.tgz#8e49c731d1749cbec05050ee5145147b32496a51" - integrity sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw== +minimatch@^9.0.4, minimatch@^9.0.5: + version "9.0.5" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.5.tgz#d74f9dd6b57d83d8e98cfb82133b03978bc929e5" + integrity sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow== dependencies: brace-expansion "^2.0.1" @@ -5851,6 +5870,11 @@ pumpify@^1.3.5: inherits "^2.0.3" pump "^2.0.0" +punycode.js@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/punycode.js/-/punycode.js-2.3.1.tgz#6b53e56ad75588234e79f4affa90972c7dd8cdb7" + integrity sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA== + punycode@^2.1.0: version "2.3.1" resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" @@ -6298,15 +6322,12 @@ shebang-regex@^3.0.0: resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172" integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== -shiki@^0.14.7: - version "0.14.7" - resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.14.7.tgz#c3c9e1853e9737845f1d2ef81b31bcfb07056d4e" - integrity sha512-dNPAPrxSc87ua2sKJ3H5dQ/6ZaY8RNnaAqK+t0eG7p0Soi2ydiqbGOTaZCqaYvA/uZYfS1LJnemt3Q+mSfcPCg== +shiki@^1.9.1: + version "1.10.0" + resolved "https://registry.yarnpkg.com/shiki/-/shiki-1.10.0.tgz#304ab080a12458abc78eb0cb83eb0f7ace546215" + integrity sha512-YD2sXQ+TMD/F9BimV9Jn0wj35pqOvywvOG/3PB6hGHyGKlM7TJ9tyJ02jOb2kF8F0HfJwKNYrh3sW7jEcuRlXA== dependencies: - ansi-sequence-parser "^1.1.0" - jsonc-parser "^3.2.0" - vscode-oniguruma "^1.7.0" - vscode-textmate "^8.0.0" + "@shikijs/core" "1.10.0" signal-exit@^3.0.2, signal-exit@^3.0.3, signal-exit@^3.0.7: version "3.0.7" @@ -6973,15 +6994,16 @@ typedarray@^0.0.6: resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777" integrity sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA== -typedoc@0.25.13: - version "0.25.13" - resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.25.13.tgz#9a98819e3b2d155a6d78589b46fa4c03768f0922" - integrity sha512-pQqiwiJ+Z4pigfOnnysObszLiU3mVLWAExSPf+Mu06G/qsc3wzbuM56SZQvONhHLncLUhYzOVkjFFpFfL5AzhQ== +typedoc@0.26.3: + version "0.26.3" + resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.26.3.tgz#723b2c4ca5dd1d9baf43d6a5a1f4d640ba4207a8" + integrity sha512-6d2Sw9disvvpdk4K7VNjKr5/3hzijtfQVHRthhDqJgnhMHy1wQz4yPMJVKXElvnZhFr0nkzo+GzjXDTRV5yLpg== dependencies: lunr "^2.3.9" - marked "^4.3.0" - minimatch "^9.0.3" - shiki "^0.14.7" + markdown-it "^14.1.0" + minimatch "^9.0.5" + shiki "^1.9.1" + yaml "^2.4.5" typescript@5.4.5: version "5.4.5" @@ -6998,6 +7020,11 @@ typical@^7.1.1: resolved "https://registry.yarnpkg.com/typical/-/typical-7.1.1.tgz#ba177ab7ab103b78534463ffa4c0c9754523ac1f" integrity sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA== +uc.micro@^2.0.0, uc.micro@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-2.1.0.tgz#f8d3f7d0ec4c3dea35a7e3c8efa4cb8b45c9e7ee" + integrity sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A== + unc-path-regex@^0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa" @@ -7218,16 +7245,6 @@ vinyl@^3.0.0: replace-ext "^2.0.0" teex "^1.0.1" -vscode-oniguruma@^1.7.0: - version "1.7.0" - resolved "https://registry.yarnpkg.com/vscode-oniguruma/-/vscode-oniguruma-1.7.0.tgz#439bfad8fe71abd7798338d1cd3dc53a8beea94b" - integrity sha512-L9WMGRfrjOhgHSdOYgCt/yRMsXzLDJSL7BPrOZt73gU0iWO4mpqzqQzOz5srxqTvMBaR0XZTSrVWo4j55Rc6cA== - -vscode-textmate@^8.0.0: - version "8.0.0" - resolved "https://registry.yarnpkg.com/vscode-textmate/-/vscode-textmate-8.0.0.tgz#2c7a3b1163ef0441097e0b5d6389cd5504b59e5d" - integrity sha512-AFbieoL7a5LMqcnOF04ji+rpXadgOXnZsxQr//r83kLPr7biP7am3g9zbaZIaBGwBRWeSvoMD4mgPdX3e4NWBg== - walker@^1.0.8: version "1.0.8" resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.8.tgz#bd498db477afe573dc04185f011d3ab8a8d7653f" @@ -7439,6 +7456,11 @@ yallist@^4.0.0: resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72" integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== +yaml@^2.4.5: + version "2.4.5" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.4.5.tgz#60630b206dd6d84df97003d33fc1ddf6296cca5e" + integrity sha512-aBx2bnqDzVOyNKfsysjA2ms5ZlnjSAW2eG3/L5G/CSujfjLJTJsEw1bGw8kCf04KodQWk1pxlGnZ56CRxiawmg== + yargs-parser@>=5.0.0-security.0, yargs-parser@^21.0.1, yargs-parser@^21.1.1: version "21.1.1" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" From 7e7ea784ee458dd13d1693e216c17c4a6a25c5fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:31:44 +0900 Subject: [PATCH 010/122] MINOR: [JS] Bump @typescript-eslint/parser from 7.12.0 to 7.14.1 in /js (#43103) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@ typescript-eslint/parser](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/parser) from 7.12.0 to 7.14.1.
Release notes

Sourced from @​typescript-eslint/parser's releases.

v7.14.1

7.14.1 (2024-06-24)

🚀 Features

  • support TypeScript 5.5 (#9397)
  • ast-spec: tighter types and documentation for declaration/* (#9211)

🩹 Fixes

  • keep warnAbountTSVersion in sync with package.json (#9400)
  • eslint-plugin: [no-extraneous-class] handle abstract members (#9367)
  • eslint-plugin: [prefer-nullish-coalescing] handle intersected primitive types (#9378)
  • eslint-plugin: [no-invalid-this] support AccessorProperty (#9411)
  • eslint-plugin: [prefer-nullish-coalescing] treat enums and literals as their underlying primitive types (#9376)
  • eslint-plugin: [prefer-nullish-coalescing] ensure ternary fix does not remove parens (#9380)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

v7.14.0

7.14.0 (2024-06-24)

We followed this up soon after with 7.14.1 - see the combined release notes here https://github.com/typescript-eslint/typescript-eslint/releases/tag/v7.14.1

You can read about our versioning strategy and releases on our website.

v7.13.1

7.13.1 (2024-06-17)

🩹 Fixes

  • eslint-plugin: [prefer-readonly] refine report locations (#8894)
  • eslint-plugin: [return-await] support explicit resource management (#9044)
  • eslint-plugin: [no-unsafe-member-access] differentiate a types-error any from a true any (#9291)

❤️ Thank You

... (truncated)

Changelog

Sourced from @​typescript-eslint/parser's changelog.

7.14.1 (2024-06-24)

This was a version bump only for parser to align it with other projects, there were no code changes.

You can read about our versioning strategy and releases on our website.

7.14.0 (2024-06-24)

🚀 Features

  • support TypeScript 5.5

❤️ Thank You

  • Brad Zacher
  • cm-ayf
  • Jake Bailey
  • James Zhan
  • Joshua Chen
  • yoshi2no

You can read about our versioning strategy and releases on our website.

7.13.1 (2024-06-17)

This was a version bump only for parser to align it with other projects, there were no code changes.

You can read about our versioning strategy and releases on our website.

7.13.0 (2024-06-10)

🚀 Features

  • parser, typescript-estree: export withoutProjectParserOptions utility

❤️ Thank You

  • Fotis Papadogeorgopoulos
  • Joshua Chen
  • Kirk Waiblinger
  • Tobiloba Adedeji
  • Vinccool96
  • YeonJuan

You can read about our versioning strategy and releases on our website.

Commits
  • b4fe94f chore(release): publish 7.14.1
  • dfc4469 chore(release): publish 7.14.0
  • 1212a8f chore(release): publish 7.13.1
  • 8a178ed chore(release): publish 7.13.0
  • c9a6dd9 feat(parser, typescript-estree): export withoutProjectParserOptions utility (...
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ typescript-eslint/parser&package-manager=npm_and_yarn&previous-version=7.12.0&new-version=7.14.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 53 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/js/package.json b/js/package.json index 03e8c0159155d..770cd5dce19b9 100644 --- a/js/package.json +++ b/js/package.json @@ -73,7 +73,7 @@ "@types/jest": "29.5.12", "@types/multistream": "4.1.3", "@typescript-eslint/eslint-plugin": "7.12.0", - "@typescript-eslint/parser": "7.12.0", + "@typescript-eslint/parser": "7.14.1", "async-done": "2.0.0", "benny": "3.7.1", "cross-env": "7.0.3", diff --git a/js/yarn.lock b/js/yarn.lock index 661161dfa05a2..c4064e2955f60 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1454,15 +1454,15 @@ natural-compare "^1.4.0" ts-api-utils "^1.3.0" -"@typescript-eslint/parser@7.12.0": - version "7.12.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.12.0.tgz#8761df3345528b35049353db80010b385719b1c3" - integrity sha512-dm/J2UDY3oV3TKius2OUZIFHsomQmpHtsV0FTh1WO8EKgHLQ1QCADUqscPgTpU+ih1e21FQSRjXckHn3txn6kQ== - dependencies: - "@typescript-eslint/scope-manager" "7.12.0" - "@typescript-eslint/types" "7.12.0" - "@typescript-eslint/typescript-estree" "7.12.0" - "@typescript-eslint/visitor-keys" "7.12.0" +"@typescript-eslint/parser@7.14.1": + version "7.14.1" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.14.1.tgz#13d97f357aed3c5719f259a6cc3d1a1f065d3692" + integrity sha512-8lKUOebNLcR0D7RvlcloOacTOWzOqemWEWkKSVpMZVF/XVcwjPR+3MD08QzbW9TCGJ+DwIc6zUSGZ9vd8cO1IA== + dependencies: + "@typescript-eslint/scope-manager" "7.14.1" + "@typescript-eslint/types" "7.14.1" + "@typescript-eslint/typescript-estree" "7.14.1" + "@typescript-eslint/visitor-keys" "7.14.1" debug "^4.3.4" "@typescript-eslint/scope-manager@7.12.0": @@ -1473,6 +1473,14 @@ "@typescript-eslint/types" "7.12.0" "@typescript-eslint/visitor-keys" "7.12.0" +"@typescript-eslint/scope-manager@7.14.1": + version "7.14.1" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.14.1.tgz#63de7a577bc6fe8ee6e412a5b85499f654b93ee5" + integrity sha512-gPrFSsoYcsffYXTOZ+hT7fyJr95rdVe4kGVX1ps/dJ+DfmlnjFN/GcMxXcVkeHDKqsq6uAcVaQaIi3cFffmAbA== + dependencies: + "@typescript-eslint/types" "7.14.1" + "@typescript-eslint/visitor-keys" "7.14.1" + "@typescript-eslint/type-utils@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908" @@ -1488,6 +1496,11 @@ resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981" integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg== +"@typescript-eslint/types@7.14.1": + version "7.14.1" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.14.1.tgz#a43a540dbe5df7f2a11269683d777fc50b4350aa" + integrity sha512-mL7zNEOQybo5R3AavY+Am7KLv8BorIv7HCYS5rKoNZKQD9tsfGUpO4KdAn3sSUvTiS4PQkr2+K0KJbxj8H9NDg== + "@typescript-eslint/typescript-estree@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9" @@ -1502,6 +1515,20 @@ semver "^7.6.0" ts-api-utils "^1.3.0" +"@typescript-eslint/typescript-estree@7.14.1": + version "7.14.1" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.14.1.tgz#ba7c9bac8744487749d19569e254d057754a1575" + integrity sha512-k5d0VuxViE2ulIO6FbxxSZaxqDVUyMbXcidC8rHvii0I56XZPv8cq+EhMns+d/EVIL41sMXqRbK3D10Oza1bbA== + dependencies: + "@typescript-eslint/types" "7.14.1" + "@typescript-eslint/visitor-keys" "7.14.1" + debug "^4.3.4" + globby "^11.1.0" + is-glob "^4.0.3" + minimatch "^9.0.4" + semver "^7.6.0" + ts-api-utils "^1.3.0" + "@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0" @@ -1520,6 +1547,14 @@ "@typescript-eslint/types" "7.12.0" eslint-visitor-keys "^3.4.3" +"@typescript-eslint/visitor-keys@7.14.1": + version "7.14.1" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.14.1.tgz#cc79b5ea154aea734b2a13b983670749f5742274" + integrity sha512-Crb+F75U1JAEtBeQGxSKwI60hZmmzaqA3z9sYsVm8X7W5cwLEm5bRe0/uXS6+MR/y8CVpKSR/ontIAIEPFcEkA== + dependencies: + "@typescript-eslint/types" "7.14.1" + eslint-visitor-keys "^3.4.3" + "@ungap/structured-clone@^1.2.0": version "1.2.0" resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" From e98ddb2a87b6117076233de8edbdfe11d82d7516 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:32:04 +0900 Subject: [PATCH 011/122] MINOR: [JS] Bump esbuild from 0.21.4 to 0.22.0 in /js (#43104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [esbuild](https://github.com/evanw/esbuild) from 0.21.4 to 0.22.0.
Release notes

Sourced from esbuild's releases.

v0.22.0

This release deliberately contains backwards-incompatible changes. To avoid automatically picking up releases like this, you should either be pinning the exact version of esbuild in your package.json file (recommended) or be using a version range syntax that only accepts patch upgrades such as ^0.21.0 or ~0.21.0. See npm's documentation about semver for more information.

  • Omit packages from bundles by default when targeting node (#1874, #2830, #2846, #2915, #3145, #3294, #3323, #3582, #3809, #3815)

    This breaking change is an experiment. People are commonly confused when using esbuild to bundle code for node (i.e. for --platform=node) because some packages may not be intended for bundlers, and may use node-specific features that don't work with a bundler. Even though esbuild's "getting started" instructions say to use --packages=external to work around this problem, many people don't read the documentation and don't do this, and are then confused when it doesn't work. So arguably this is a bad default behavior for esbuild to have if people keep tripping over this.

    With this release, esbuild will now omit packages from the bundle by default when the platform is node (i.e. the previous behavior of --packages=external is now the default in this case). Note that your dependencies must now be present on the file system when your bundle is run. If you don't want this behavior, you can do --packages=bundle to allow packages to be included in the bundle (i.e. the previous default behavior). Note that --packages=bundle doesn't mean all packages are bundled, just that packages are allowed to be bundled. You can still exclude individual packages from the bundle using --external: even when --packages=bundle is present.

    The --packages= setting considers all import paths that "look like" package imports in the original source code to be package imports. Specifically import paths that don't start with a path segment of / or . or .. are considered to be package imports. The only two exceptions to this rule are subpath imports (which start with a # character) and TypeScript path remappings via paths and/or baseUrl in tsconfig.json (which are applied first).

  • Drop support for older platforms (#3802)

    This release drops support for the following operating systems:

    • Windows 7
    • Windows 8
    • Windows Server 2008
    • Windows Server 2012

    This is because the Go programming language dropped support for these operating system versions in Go 1.21, and this release updates esbuild from Go 1.20 to Go 1.22.

    Note that this only affects the binary esbuild executables that are published to the esbuild npm package. It's still possible to compile esbuild's source code for these older operating systems. If you need to, you can compile esbuild for yourself using an older version of the Go compiler (before Go version 1.21). That might look something like this:

    git clone https://github.com/evanw/esbuild.git
    cd esbuild
    go build ./cmd/esbuild
    ./esbuild.exe --version
    

    In addition, this release increases the minimum required node version for esbuild's JavaScript API from node 12 to node 18. Node 18 is the oldest version of node that is still being supported (see node's release schedule for more information). This increase is because of an incompatibility between the JavaScript that the Go compiler generates for the esbuild-wasm package and versions of node before node 17.4 (specifically the crypto.getRandomValues function).

  • Update await using behavior to match TypeScript

    TypeScript 5.5 subtly changes the way await using behaves. This release updates esbuild to match these changes in TypeScript. You can read more about these changes in microsoft/TypeScript#58624.

  • Allow es2024 as a target environment

    The ECMAScript 2024 specification was just approved, so it has been added to esbuild as a possible compilation target. You can read more about the features that it adds here: https://2ality.com/2024/06/ecmascript-2024.html. The only addition that's relevant for esbuild is the regular expression /v flag. With --target=es2024, regular expressions that use the /v flag will now be passed through untransformed instead of being transformed into a call to new RegExp.

  • Publish binaries for OpenBSD on 64-bit ARM (#3665, #3674)

    With this release, you should now be able to install the esbuild npm package in OpenBSD on 64-bit ARM, such as on an Apple device with an M1 chip.

    This was contributed by @​ikmckenz.

  • Publish binaries for WASI (WebAssembly System Interface) preview 1 (#3300, #3779)

    The upcoming WASI (WebAssembly System Interface) standard is going to be a way to run WebAssembly outside of a JavaScript host environment. In this scenario you only need a .wasm file without any supporting JavaScript code. Instead of JavaScript providing the APIs for the host environment, the WASI standard specifies a "system interface" that WebAssembly code can access directly (e.g. for file system access).

... (truncated)

Changelog

Sourced from esbuild's changelog.

0.22.0

This release deliberately contains backwards-incompatible changes. To avoid automatically picking up releases like this, you should either be pinning the exact version of esbuild in your package.json file (recommended) or be using a version range syntax that only accepts patch upgrades such as ^0.21.0 or ~0.21.0. See npm's documentation about semver for more information.

  • Omit packages from bundles by default when targeting node (#1874, #2830, #2846, #2915, #3145, #3294, #3323, #3582, #3809, #3815)

    This breaking change is an experiment. People are commonly confused when using esbuild to bundle code for node (i.e. for --platform=node) because some packages may not be intended for bundlers, and may use node-specific features that don't work with a bundler. Even though esbuild's "getting started" instructions say to use --packages=external to work around this problem, many people don't read the documentation and don't do this, and are then confused when it doesn't work. So arguably this is a bad default behavior for esbuild to have if people keep tripping over this.

    With this release, esbuild will now omit packages from the bundle by default when the platform is node (i.e. the previous behavior of --packages=external is now the default in this case). Note that your dependencies must now be present on the file system when your bundle is run. If you don't want this behavior, you can do --packages=bundle to allow packages to be included in the bundle (i.e. the previous default behavior). Note that --packages=bundle doesn't mean all packages are bundled, just that packages are allowed to be bundled. You can still exclude individual packages from the bundle using --external: even when --packages=bundle is present.

    The --packages= setting considers all import paths that "look like" package imports in the original source code to be package imports. Specifically import paths that don't start with a path segment of / or . or .. are considered to be package imports. The only two exceptions to this rule are subpath imports (which start with a # character) and TypeScript path remappings via paths and/or baseUrl in tsconfig.json (which are applied first).

  • Drop support for older platforms (#3802)

    This release drops support for the following operating systems:

    • Windows 7
    • Windows 8
    • Windows Server 2008
    • Windows Server 2012

    This is because the Go programming language dropped support for these operating system versions in Go 1.21, and this release updates esbuild from Go 1.20 to Go 1.22.

    Note that this only affects the binary esbuild executables that are published to the esbuild npm package. It's still possible to compile esbuild's source code for these older operating systems. If you need to, you can compile esbuild for yourself using an older version of the Go compiler (before Go version 1.21). That might look something like this:

    git clone https://github.com/evanw/esbuild.git
    cd esbuild
    go build ./cmd/esbuild
    ./esbuild.exe --version
    

    In addition, this release increases the minimum required node version for esbuild's JavaScript API from node 12 to node 18. Node 18 is the oldest version of node that is still being supported (see node's release schedule for more information). This increase is because of an incompatibility between the JavaScript that the Go compiler generates for the esbuild-wasm package and versions of node before node 17.4 (specifically the crypto.getRandomValues function).

  • Update await using behavior to match TypeScript

    TypeScript 5.5 subtly changes the way await using behaves. This release updates esbuild to match these changes in TypeScript. You can read more about these changes in microsoft/TypeScript#58624.

  • Allow es2024 as a target environment

    The ECMAScript 2024 specification was just approved, so it has been added to esbuild as a possible compilation target. You can read more about the features that it adds here: https://2ality.com/2024/06/ecmascript-2024.html. The only addition that's relevant for esbuild is the regular expression /v flag. With --target=es2024, regular expressions that use the /v flag will now be passed through untransformed instead of being transformed into a call to new RegExp.

  • Publish binaries for OpenBSD on 64-bit ARM (#3665, #3674)

    With this release, you should now be able to install the esbuild npm package in OpenBSD on 64-bit ARM, such as on an Apple device with an M1 chip.

    This was contributed by @​ikmckenz.

  • Publish binaries for WASI (WebAssembly System Interface) preview 1 (#3300, #3779)

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=esbuild&package-manager=npm_and_yarn&previous-version=0.21.4&new-version=0.22.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 244 +++++++++++++++++++++++++----------------------- 2 files changed, 126 insertions(+), 120 deletions(-) diff --git a/js/package.json b/js/package.json index 770cd5dce19b9..ac49ec162549b 100644 --- a/js/package.json +++ b/js/package.json @@ -79,7 +79,7 @@ "cross-env": "7.0.3", "del": "7.1.0", "del-cli": "5.1.0", - "esbuild": "0.21.4", + "esbuild": "0.22.0", "esbuild-plugin-alias": "0.2.1", "eslint": "8.57.0", "eslint-plugin-jest": "28.5.0", diff --git a/js/yarn.lock b/js/yarn.lock index c4064e2955f60..2139c7b07c3bc 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -362,230 +362,235 @@ resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz#d1bc06aedb6936b3b6d313bf809a5a40387d2b7f" integrity sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA== -"@esbuild/aix-ppc64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.4.tgz#f83eb142df3ca7b49531c1ed680b81e484316508" - integrity sha512-Zrm+B33R4LWPLjDEVnEqt2+SLTATlru1q/xYKVn8oVTbiRBGmK2VIMoIYGJDGyftnGaC788IuzGFAlb7IQ0Y8A== +"@esbuild/aix-ppc64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.22.0.tgz#6ff1ec509335ffbaee3fc4a5a11373d6f029b2c4" + integrity sha512-uvQR2crZ/zgzSHDvdygHyNI+ze9zwS8mqz0YtGXotSqvEE0UkYE9s+FZKQNTt1VtT719mfP3vHrUdCpxBNQZhQ== "@esbuild/android-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz#7ad65a36cfdb7e0d429c353e00f680d737c2aed4" integrity sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA== -"@esbuild/android-arm64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.21.4.tgz#dd328039daccd6033b2d1e536c054914bfc92287" - integrity sha512-fYFnz+ObClJ3dNiITySBUx+oNalYUT18/AryMxfovLkYWbutXsct3Wz2ZWAcGGppp+RVVX5FiXeLYGi97umisA== +"@esbuild/android-arm64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.22.0.tgz#a02ef8650fe5ce17807c9f3229a36d326d2b07ea" + integrity sha512-UKhPb3o2gAB/bfXcl58ZXTn1q2oVu1rEu/bKrCtmm+Nj5MKUbrOwR5WAixE2v+lk0amWuwPvhnPpBRLIGiq7ig== "@esbuild/android-arm@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.19.12.tgz#b0c26536f37776162ca8bde25e42040c203f2824" integrity sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w== -"@esbuild/android-arm@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.21.4.tgz#76767a989720a97b206ea14c52af6e4589e48b0d" - integrity sha512-E7H/yTd8kGQfY4z9t3nRPk/hrhaCajfA3YSQSBrst8B+3uTcgsi8N+ZWYCaeIDsiVs6m65JPCaQN/DxBRclF3A== +"@esbuild/android-arm@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.22.0.tgz#dd26ec407db736eee0eb060195a43aa13f618013" + integrity sha512-PBnyP+r8vJE4ifxsWys9l+Mc2UY/yYZOpX82eoyGISXXb3dRr0M21v+s4fgRKWMFPMSf/iyowqPW/u7ScSUkjQ== "@esbuild/android-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.19.12.tgz#cb13e2211282012194d89bf3bfe7721273473b3d" integrity sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew== -"@esbuild/android-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.21.4.tgz#14a8ae3c35702d882086efb5a8f8d7b0038d8d35" - integrity sha512-mDqmlge3hFbEPbCWxp4fM6hqq7aZfLEHZAKGP9viq9wMUBVQx202aDIfc3l+d2cKhUJM741VrCXEzRFhPDKH3Q== +"@esbuild/android-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.22.0.tgz#f02771a20be264ccc22478dcc7de8f2bde858af8" + integrity sha512-IjTYtvIrjhR41Ijy2dDPgYjQHWG/x/A4KXYbs1fiU3efpRdoxMChK3oEZV6GPzVEzJqxFgcuBaiX1kwEvWUxSw== "@esbuild/darwin-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz#cbee41e988020d4b516e9d9e44dd29200996275e" integrity sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g== -"@esbuild/darwin-arm64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.4.tgz#7e735046005e4c12e9139e0bdd1fa6a754430d57" - integrity sha512-72eaIrDZDSiWqpmCzVaBD58c8ea8cw/U0fq/PPOTqE3c53D0xVMRt2ooIABZ6/wj99Y+h4ksT/+I+srCDLU9TA== +"@esbuild/darwin-arm64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.22.0.tgz#d905f2b951aeba328dd02e3a09f86b5d4e5e6741" + integrity sha512-mqt+Go4y9wRvEz81bhKd9RpHsQR1LwU8Xm6jZRUV/xpM7cIQFbFH6wBCLPTNsdELBvfoHeumud7X78jQQJv2TA== "@esbuild/darwin-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz#e37d9633246d52aecf491ee916ece709f9d5f4cd" integrity sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A== -"@esbuild/darwin-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.21.4.tgz#db623553547a5fe3502a63aa88306e9023178482" - integrity sha512-uBsuwRMehGmw1JC7Vecu/upOjTsMhgahmDkWhGLWxIgUn2x/Y4tIwUZngsmVb6XyPSTXJYS4YiASKPcm9Zitag== +"@esbuild/darwin-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.22.0.tgz#d07b4fe501fe9985590285b2790039ed4743f86e" + integrity sha512-vTaTQ9OgYc3VTaWtOE5pSuDT6H3d/qSRFRfSBbnxFfzAvYoB3pqKXA0LEbi/oT8GUOEAutspfRMqPj2ezdFaMw== "@esbuild/freebsd-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz#1ee4d8b682ed363b08af74d1ea2b2b4dbba76487" integrity sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA== -"@esbuild/freebsd-arm64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.4.tgz#91cbad647c079bf932086fbd4749d7f563df67b8" - integrity sha512-8JfuSC6YMSAEIZIWNL3GtdUT5NhUA/CMUCpZdDRolUXNAXEE/Vbpe6qlGLpfThtY5NwXq8Hi4nJy4YfPh+TwAg== +"@esbuild/freebsd-arm64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.22.0.tgz#4251e0a14716116f4fa7e22d908f47408b6c2fb5" + integrity sha512-0e1ZgoobJzaGnR4reD7I9rYZ7ttqdh1KPvJWnquUoDJhL0rYwdneeLailBzd2/4g/U5p4e5TIHEWa68NF2hFpQ== "@esbuild/freebsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz#37a693553d42ff77cd7126764b535fb6cc28a11c" integrity sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg== -"@esbuild/freebsd-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.4.tgz#723299b9859ccbe5532fecbadba3ac33019ba8e8" - integrity sha512-8d9y9eQhxv4ef7JmXny7591P/PYsDFc4+STaxC1GBv0tMyCdyWfXu2jBuqRsyhY8uL2HU8uPyscgE2KxCY9imQ== +"@esbuild/freebsd-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.22.0.tgz#7dbd35616a71f8a9b61a9435c5a79d87fc0b2f1a" + integrity sha512-BFgyYwlCwRWyPQJtkzqq2p6pJbiiWgp0P9PNf7a5FQ1itKY4czPuOMAlFVItirSmEpRPCeImuwePNScZS0pL5Q== "@esbuild/linux-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz#be9b145985ec6c57470e0e051d887b09dddb2d4b" integrity sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA== -"@esbuild/linux-arm64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.21.4.tgz#531743f861e1ef6e50b874d6c784cda37aa5e685" - integrity sha512-/GLD2orjNU50v9PcxNpYZi+y8dJ7e7/LhQukN3S4jNDXCKkyyiyAz9zDw3siZ7Eh1tRcnCHAo/WcqKMzmi4eMQ== +"@esbuild/linux-arm64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.22.0.tgz#77cded446dd0c3b723d272e0243b3d9ddb3cb46e" + integrity sha512-V/K2rctCUgC0PCXpN7AqT4hoazXKgIYugFGu/myk2+pfe6jTW2guz/TBwq4cZ7ESqusR/IzkcQaBkcjquuBWsw== "@esbuild/linux-arm@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz#207ecd982a8db95f7b5279207d0ff2331acf5eef" integrity sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w== -"@esbuild/linux-arm@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.21.4.tgz#1144b5654764960dd97d90ddf0893a9afc63ad91" - integrity sha512-2rqFFefpYmpMs+FWjkzSgXg5vViocqpq5a1PSRgT0AvSgxoXmGF17qfGAzKedg6wAwyM7UltrKVo9kxaJLMF/g== +"@esbuild/linux-arm@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.22.0.tgz#6587d3e423e09766ea997229827e292e7c4acd6f" + integrity sha512-KEMWiA9aGuPUD4BH5yjlhElLgaRXe+Eri6gKBoDazoPBTo1BXc/e6IW5FcJO9DoL19FBeCxgONyh95hLDNepIg== "@esbuild/linux-ia32@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz#d0d86b5ca1562523dc284a6723293a52d5860601" integrity sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA== -"@esbuild/linux-ia32@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.21.4.tgz#c81b6f2ed3308d3b75ccefb5ac63bc4cf3a9d2e9" - integrity sha512-pNftBl7m/tFG3t2m/tSjuYeWIffzwAZT9m08+9DPLizxVOsUl8DdFzn9HvJrTQwe3wvJnwTdl92AonY36w/25g== +"@esbuild/linux-ia32@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.22.0.tgz#2d06d7b4abc443e05a820ff50d4c2d98cc04c22f" + integrity sha512-r2ZZqkOMOrpUhzNwxI7uLAHIDwkfeqmTnrv1cjpL/rjllPWszgqmprd/om9oviKXUBpMqHbXmppvjAYgISb26Q== "@esbuild/linux-loong64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz#9a37f87fec4b8408e682b528391fa22afd952299" integrity sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA== -"@esbuild/linux-loong64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.21.4.tgz#87b6af7cd0f2551653955fc2dc465b7f4464af0a" - integrity sha512-cSD2gzCK5LuVX+hszzXQzlWya6c7hilO71L9h4KHwqI4qeqZ57bAtkgcC2YioXjsbfAv4lPn3qe3b00Zt+jIfQ== +"@esbuild/linux-loong64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.22.0.tgz#a3e7faabe9a046ac4557bc515ce0981cfe5a6e0f" + integrity sha512-qaowLrV/YOMAL2RfKQ4C/VaDzAuLDuylM2sd/LH+4OFirMl6CuDpRlCq4u49ZBaVV8pkI/Y+hTdiibvQRhojCA== "@esbuild/linux-mips64el@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz#4ddebd4e6eeba20b509d8e74c8e30d8ace0b89ec" integrity sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w== -"@esbuild/linux-mips64el@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.4.tgz#fec73cd39490a0c45d052bef03e011a0ad366c06" - integrity sha512-qtzAd3BJh7UdbiXCrg6npWLYU0YpufsV9XlufKhMhYMJGJCdfX/G6+PNd0+v877X1JG5VmjBLUiFB0o8EUSicA== +"@esbuild/linux-mips64el@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.22.0.tgz#3a2877a78f6719e5eed4cfdded5121c5ab9305a4" + integrity sha512-hgrezzjQTRxjkQ5k08J6rtZN5PNnkWx/Rz6Kmj9gnsdCAX1I4Dn4ZPqvFRkXo55Q3pnVQJBwbdtrTO7tMGtyVA== "@esbuild/linux-ppc64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz#adb67dadb73656849f63cd522f5ecb351dd8dee8" integrity sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg== -"@esbuild/linux-ppc64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.4.tgz#ea3b5e13b0fc8666bd4c6f7ea58bd1830f3e6e78" - integrity sha512-yB8AYzOTaL0D5+2a4xEy7OVvbcypvDR05MsB/VVPVA7nL4hc5w5Dyd/ddnayStDgJE59fAgNEOdLhBxjfx5+dg== +"@esbuild/linux-ppc64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.22.0.tgz#6609478066083e05cc1854a8b272daf62a7e944b" + integrity sha512-ewxg6FLLUio883XgSjfULEmDl3VPv/TYNnRprVAS3QeGFLdCYdx1tIudBcd7n9jIdk82v1Ajov4jx87qW7h9+g== "@esbuild/linux-riscv64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz#11bc0698bf0a2abf8727f1c7ace2112612c15adf" integrity sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg== -"@esbuild/linux-riscv64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.4.tgz#80d406f653fc6b193edaeb55ac88d4ac22c8f155" - integrity sha512-Y5AgOuVzPjQdgU59ramLoqSSiXddu7F3F+LI5hYy/d1UHN7K5oLzYBDZe23QmQJ9PIVUXwOdKJ/jZahPdxzm9w== +"@esbuild/linux-riscv64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.22.0.tgz#d786a89903cf98e8d34befe6a71c69562bb4ceac" + integrity sha512-Az5XbgSJC2lE8XK8pdcutsf9RgdafWdTpUK/+6uaDdfkviw/B4JCwAfh1qVeRWwOohwdsl4ywZrWBNWxwrPLFg== "@esbuild/linux-s390x@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz#e86fb8ffba7c5c92ba91fc3b27ed5a70196c3cc8" integrity sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg== -"@esbuild/linux-s390x@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.21.4.tgz#9cbd26854b5b12cf22fb54c96cd1adffaf6ace6f" - integrity sha512-Iqc/l/FFwtt8FoTK9riYv9zQNms7B8u+vAI/rxKuN10HgQIXaPzKZc479lZ0x6+vKVQbu55GdpYpeNWzjOhgbA== +"@esbuild/linux-s390x@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.22.0.tgz#a7ab13ae163307ac615dac5ce7f60a6b0a067d59" + integrity sha512-8j4a2ChT9+V34NNNY9c/gMldutaJFmfMacTPq4KfNKwv2fitBCLYjee7c+Vxaha2nUhPK7cXcZpJtJ3+Y7ZdVQ== "@esbuild/linux-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz#5f37cfdc705aea687dfe5dfbec086a05acfe9c78" integrity sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg== -"@esbuild/linux-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.21.4.tgz#44dfe1c5cad855362c830c604dba97fbb16fc114" - integrity sha512-Td9jv782UMAFsuLZINfUpoF5mZIbAj+jv1YVtE58rFtfvoKRiKSkRGQfHTgKamLVT/fO7203bHa3wU122V/Bdg== +"@esbuild/linux-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.22.0.tgz#24949de431013354da1d8c29e53299798f8c27ef" + integrity sha512-JUQyOnpbAkkRFOk/AhsEemz5TfWN4FJZxVObUlnlNCbe7QBl61ZNfM4cwBXayQA6laMJMUcqLHaYQHAB6YQ95Q== "@esbuild/netbsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz#29da566a75324e0d0dd7e47519ba2f7ef168657b" integrity sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA== -"@esbuild/netbsd-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.4.tgz#89b97d823e1cc4bf8c4e5dc8f76c8d6ceb1c87f3" - integrity sha512-Awn38oSXxsPMQxaV0Ipb7W/gxZtk5Tx3+W+rAPdZkyEhQ6968r9NvtkjhnhbEgWXYbgV+JEONJ6PcdBS+nlcpA== +"@esbuild/netbsd-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.22.0.tgz#bc3f51c41eaab89cf5fdb09d0c633affb39cb1a1" + integrity sha512-11PoCoHXo4HFNbLsXuMB6bpMPWGDiw7xETji6COdJss4SQZLvcgNoeSqWtATRm10Jj1uEHiaIk4N0PiN6x4Fcg== + +"@esbuild/openbsd-arm64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.22.0.tgz#7cb42e3a0d3da039d1a4b7ccbd0c19b0f71ae453" + integrity sha512-Ezlhu/YyITmXwKSB+Zu/QqD7cxrjrpiw85cc0Rbd3AWr2wsgp+dWbWOE8MqHaLW9NKMZvuL0DhbJbvzR7F6Zvg== "@esbuild/openbsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz#306c0acbdb5a99c95be98bdd1d47c916e7dc3ff0" integrity sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw== -"@esbuild/openbsd-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.4.tgz#080715bb4981c326364320d7b56835608e2bd98d" - integrity sha512-IsUmQeCY0aU374R82fxIPu6vkOybWIMc3hVGZ3ChRwL9hA1TwY+tS0lgFWV5+F1+1ssuvvXt3HFqe8roCip8Hg== +"@esbuild/openbsd-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.22.0.tgz#194aa9915323962e9ea66c5a13ff3e1db272a683" + integrity sha512-ufjdW5tFJGUjlH9j/5cCE9lrwRffyZh+T4vYvoDKoYsC6IXbwaFeV/ENxeNXcxotF0P8CDzoICXVSbJaGBhkrw== "@esbuild/sunos-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz#0933eaab9af8b9b2c930236f62aae3fc593faf30" integrity sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA== -"@esbuild/sunos-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.21.4.tgz#8d838a8ac80e211536490108b72fb0091a811626" - integrity sha512-hsKhgZ4teLUaDA6FG/QIu2q0rI6I36tZVfM4DBZv3BG0mkMIdEnMbhc4xwLvLJSS22uWmaVkFkqWgIS0gPIm+A== +"@esbuild/sunos-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.22.0.tgz#2be9d2459ae181ebedb6470e4469349a27c4f060" + integrity sha512-zY6ly/AoSmKnmNTowDJsK5ehra153/5ZhqxNLfq9NRsTTltetr+yHHcQ4RW7QDqw4JC8A1uC1YmeSfK9NRcK1w== "@esbuild/win32-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz#773bdbaa1971b36db2f6560088639ccd1e6773ae" integrity sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A== -"@esbuild/win32-arm64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.21.4.tgz#94afb4c2ac89b0f09791606d6d93fdab322f81c8" - integrity sha512-UUfMgMoXPoA/bvGUNfUBFLCh0gt9dxZYIx9W4rfJr7+hKe5jxxHmfOK8YSH4qsHLLN4Ck8JZ+v7Q5fIm1huErg== +"@esbuild/win32-arm64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.22.0.tgz#6b4224f2d049c26f37026904210a4293e34c2747" + integrity sha512-Kml5F7tv/1Maam0pbbCrvkk9vj046dPej30kFzlhXnhuCtYYBP6FGy/cLbc5yUT1lkZznGLf2OvuvmLjscO5rw== "@esbuild/win32-ia32@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz#000516cad06354cc84a73f0943a4aa690ef6fd67" integrity sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ== -"@esbuild/win32-ia32@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.21.4.tgz#822085cd52f2f1dd90eabb59346ffa779c0bab83" - integrity sha512-yIxbspZb5kGCAHWm8dexALQ9en1IYDfErzjSEq1KzXFniHv019VT3mNtTK7t8qdy4TwT6QYHI9sEZabONHg+aw== +"@esbuild/win32-ia32@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.22.0.tgz#4a1184f6fd4a7594c4f1e68b1e649248534f7832" + integrity sha512-IOgwn+mYTM3RrcydP4Og5IpXh+ftN8oF+HELTXSmbWBlujuci4Qa3DTeO+LEErceisI7KUSfEIiX+WOUlpELkw== "@esbuild/win32-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz#c57c8afbb4054a3ab8317591a0b7320360b444ae" integrity sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA== -"@esbuild/win32-x64@0.21.4": - version "0.21.4" - resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.21.4.tgz#11ef0398f9abee161193461910a507ef0d4c0c32" - integrity sha512-sywLRD3UK/qRJt0oBwdpYLBibk7KiRfbswmWRDabuncQYSlf8aLEEUor/oP6KRz8KEG+HoiVLBhPRD5JWjS8Sg== +"@esbuild/win32-x64@0.22.0": + version "0.22.0" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.22.0.tgz#4b83e9449a205e7d94d5368035450fc1680fe525" + integrity sha512-4bDHJrk2WHBXJPhy1y80X7/5b5iZTZP3LGcKIlAP1J+KqZ4zQAPMLEzftGyjjfcKbA4JDlPt/+2R/F1ZTeRgrw== "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0": version "4.4.0" @@ -2952,34 +2957,35 @@ esbuild-plugin-alias@0.2.1: resolved "https://registry.yarnpkg.com/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb" integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ== -esbuild@0.21.4: - version "0.21.4" - resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.21.4.tgz#ceb501def8edb12a5bfd9c55f3a96db698edf022" - integrity sha512-sFMcNNrj+Q0ZDolrp5pDhH0nRPN9hLIM3fRPwgbLYJeSHHgnXSnbV3xYgSVuOeLWH9c73VwmEverVzupIv5xuA== +esbuild@0.22.0: + version "0.22.0" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.22.0.tgz#9742e664aac9f61e2898f4c27bd4dd4272e6f661" + integrity sha512-zNYA6bFZsVnsU481FnGAQjLDW0Pl/8BGG7EvAp15RzUvGC+ME7hf1q7LvIfStEQBz/iEHuBJCYcOwPmNCf1Tlw== optionalDependencies: - "@esbuild/aix-ppc64" "0.21.4" - "@esbuild/android-arm" "0.21.4" - "@esbuild/android-arm64" "0.21.4" - "@esbuild/android-x64" "0.21.4" - "@esbuild/darwin-arm64" "0.21.4" - "@esbuild/darwin-x64" "0.21.4" - "@esbuild/freebsd-arm64" "0.21.4" - "@esbuild/freebsd-x64" "0.21.4" - "@esbuild/linux-arm" "0.21.4" - "@esbuild/linux-arm64" "0.21.4" - "@esbuild/linux-ia32" "0.21.4" - "@esbuild/linux-loong64" "0.21.4" - "@esbuild/linux-mips64el" "0.21.4" - "@esbuild/linux-ppc64" "0.21.4" - "@esbuild/linux-riscv64" "0.21.4" - "@esbuild/linux-s390x" "0.21.4" - "@esbuild/linux-x64" "0.21.4" - "@esbuild/netbsd-x64" "0.21.4" - "@esbuild/openbsd-x64" "0.21.4" - "@esbuild/sunos-x64" "0.21.4" - "@esbuild/win32-arm64" "0.21.4" - "@esbuild/win32-ia32" "0.21.4" - "@esbuild/win32-x64" "0.21.4" + "@esbuild/aix-ppc64" "0.22.0" + "@esbuild/android-arm" "0.22.0" + "@esbuild/android-arm64" "0.22.0" + "@esbuild/android-x64" "0.22.0" + "@esbuild/darwin-arm64" "0.22.0" + "@esbuild/darwin-x64" "0.22.0" + "@esbuild/freebsd-arm64" "0.22.0" + "@esbuild/freebsd-x64" "0.22.0" + "@esbuild/linux-arm" "0.22.0" + "@esbuild/linux-arm64" "0.22.0" + "@esbuild/linux-ia32" "0.22.0" + "@esbuild/linux-loong64" "0.22.0" + "@esbuild/linux-mips64el" "0.22.0" + "@esbuild/linux-ppc64" "0.22.0" + "@esbuild/linux-riscv64" "0.22.0" + "@esbuild/linux-s390x" "0.22.0" + "@esbuild/linux-x64" "0.22.0" + "@esbuild/netbsd-x64" "0.22.0" + "@esbuild/openbsd-arm64" "0.22.0" + "@esbuild/openbsd-x64" "0.22.0" + "@esbuild/sunos-x64" "0.22.0" + "@esbuild/win32-arm64" "0.22.0" + "@esbuild/win32-ia32" "0.22.0" + "@esbuild/win32-x64" "0.22.0" esbuild@^0.19.6: version "0.19.12" From 76cdafe78112e6492d3f7b5416775d39f5002072 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:32:40 +0900 Subject: [PATCH 012/122] MINOR: [JS] Bump @types/node from 20.14.1 to 20.14.9 in /js (#43107) Bumps [@ types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) from 20.14.1 to 20.14.9.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ types/node&package-manager=npm_and_yarn&previous-version=20.14.1&new-version=20.14.9)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/yarn.lock b/js/yarn.lock index 2139c7b07c3bc..d98281537670b 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1396,9 +1396,9 @@ "@types/node" "*" "@types/node@*", "@types/node@>=13.7.4", "@types/node@^20.13.0": - version "20.14.1" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.1.tgz#2434dbcb1f039e31f2c0e9969da93f52cf6348f3" - integrity sha512-T2MzSGEu+ysB/FkWfqmhV3PLyQlowdptmmgD20C6QxsS8Fmv5SjpZ1ayXaEC0S21/h5UJ9iA6W/5vSNU5l00OA== + version "20.14.9" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.9.tgz#12e8e765ab27f8c421a1820c99f5f313a933b420" + integrity sha512-06OCtnTXtWOZBJlRApleWndH4JsRVs1pDCc8dLSQp+7PpUpX3ePdHyeNSFTeSe7FtKyQkrlPvHwJOW3SLd8Oyg== dependencies: undici-types "~5.26.4" From 56cf6883db9980f7423891fa462e52f7acbbc70f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 14:55:26 +0900 Subject: [PATCH 013/122] MINOR: [JS] Bump gulp-esbuild from 0.12.0 to 0.12.1 in /js (#43106) Bumps [gulp-esbuild](https://github.com/ym-project/gulp-esbuild) from 0.12.0 to 0.12.1.
Release notes

Sourced from gulp-esbuild's releases.

v.0.12.1

Changes

  • deps: update dependencies

Esbuild release notes

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=gulp-esbuild&package-manager=npm_and_yarn&previous-version=0.12.0&new-version=0.12.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 248 ++++++++++++++++++++++++------------------------ 2 files changed, 125 insertions(+), 125 deletions(-) diff --git a/js/package.json b/js/package.json index ac49ec162549b..4f7c8f27cf3f8 100644 --- a/js/package.json +++ b/js/package.json @@ -88,7 +88,7 @@ "gulp": "4.0.2", "glob": "10.4.1", "google-closure-compiler": "20240317.0.0", - "gulp-esbuild": "0.12.0", + "gulp-esbuild": "0.12.1", "gulp-json-transform": "0.5.0", "gulp-rename": "2.0.0", "gulp-replace": "1.1.4", diff --git a/js/yarn.lock b/js/yarn.lock index d98281537670b..a4c9bc1089d3d 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -357,180 +357,180 @@ resolved "https://registry.yarnpkg.com/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz#1d572bfbbe14b7704e0ba0f39b74815b84870d70" integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw== -"@esbuild/aix-ppc64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz#d1bc06aedb6936b3b6d313bf809a5a40387d2b7f" - integrity sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA== +"@esbuild/aix-ppc64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f" + integrity sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ== "@esbuild/aix-ppc64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.22.0.tgz#6ff1ec509335ffbaee3fc4a5a11373d6f029b2c4" integrity sha512-uvQR2crZ/zgzSHDvdygHyNI+ze9zwS8mqz0YtGXotSqvEE0UkYE9s+FZKQNTt1VtT719mfP3vHrUdCpxBNQZhQ== -"@esbuild/android-arm64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz#7ad65a36cfdb7e0d429c353e00f680d737c2aed4" - integrity sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA== +"@esbuild/android-arm64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz#09d9b4357780da9ea3a7dfb833a1f1ff439b4052" + integrity sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A== "@esbuild/android-arm64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.22.0.tgz#a02ef8650fe5ce17807c9f3229a36d326d2b07ea" integrity sha512-UKhPb3o2gAB/bfXcl58ZXTn1q2oVu1rEu/bKrCtmm+Nj5MKUbrOwR5WAixE2v+lk0amWuwPvhnPpBRLIGiq7ig== -"@esbuild/android-arm@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.19.12.tgz#b0c26536f37776162ca8bde25e42040c203f2824" - integrity sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w== +"@esbuild/android-arm@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.21.5.tgz#9b04384fb771926dfa6d7ad04324ecb2ab9b2e28" + integrity sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg== "@esbuild/android-arm@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.22.0.tgz#dd26ec407db736eee0eb060195a43aa13f618013" integrity sha512-PBnyP+r8vJE4ifxsWys9l+Mc2UY/yYZOpX82eoyGISXXb3dRr0M21v+s4fgRKWMFPMSf/iyowqPW/u7ScSUkjQ== -"@esbuild/android-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.19.12.tgz#cb13e2211282012194d89bf3bfe7721273473b3d" - integrity sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew== +"@esbuild/android-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.21.5.tgz#29918ec2db754cedcb6c1b04de8cd6547af6461e" + integrity sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA== "@esbuild/android-x64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.22.0.tgz#f02771a20be264ccc22478dcc7de8f2bde858af8" integrity sha512-IjTYtvIrjhR41Ijy2dDPgYjQHWG/x/A4KXYbs1fiU3efpRdoxMChK3oEZV6GPzVEzJqxFgcuBaiX1kwEvWUxSw== -"@esbuild/darwin-arm64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz#cbee41e988020d4b516e9d9e44dd29200996275e" - integrity sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g== +"@esbuild/darwin-arm64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz#e495b539660e51690f3928af50a76fb0a6ccff2a" + integrity sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ== "@esbuild/darwin-arm64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.22.0.tgz#d905f2b951aeba328dd02e3a09f86b5d4e5e6741" integrity sha512-mqt+Go4y9wRvEz81bhKd9RpHsQR1LwU8Xm6jZRUV/xpM7cIQFbFH6wBCLPTNsdELBvfoHeumud7X78jQQJv2TA== -"@esbuild/darwin-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz#e37d9633246d52aecf491ee916ece709f9d5f4cd" - integrity sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A== +"@esbuild/darwin-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz#c13838fa57372839abdddc91d71542ceea2e1e22" + integrity sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw== "@esbuild/darwin-x64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.22.0.tgz#d07b4fe501fe9985590285b2790039ed4743f86e" integrity sha512-vTaTQ9OgYc3VTaWtOE5pSuDT6H3d/qSRFRfSBbnxFfzAvYoB3pqKXA0LEbi/oT8GUOEAutspfRMqPj2ezdFaMw== -"@esbuild/freebsd-arm64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz#1ee4d8b682ed363b08af74d1ea2b2b4dbba76487" - integrity sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA== +"@esbuild/freebsd-arm64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz#646b989aa20bf89fd071dd5dbfad69a3542e550e" + integrity sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g== "@esbuild/freebsd-arm64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.22.0.tgz#4251e0a14716116f4fa7e22d908f47408b6c2fb5" integrity sha512-0e1ZgoobJzaGnR4reD7I9rYZ7ttqdh1KPvJWnquUoDJhL0rYwdneeLailBzd2/4g/U5p4e5TIHEWa68NF2hFpQ== -"@esbuild/freebsd-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz#37a693553d42ff77cd7126764b535fb6cc28a11c" - integrity sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg== +"@esbuild/freebsd-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz#aa615cfc80af954d3458906e38ca22c18cf5c261" + integrity sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ== "@esbuild/freebsd-x64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.22.0.tgz#7dbd35616a71f8a9b61a9435c5a79d87fc0b2f1a" integrity sha512-BFgyYwlCwRWyPQJtkzqq2p6pJbiiWgp0P9PNf7a5FQ1itKY4czPuOMAlFVItirSmEpRPCeImuwePNScZS0pL5Q== -"@esbuild/linux-arm64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz#be9b145985ec6c57470e0e051d887b09dddb2d4b" - integrity sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA== +"@esbuild/linux-arm64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz#70ac6fa14f5cb7e1f7f887bcffb680ad09922b5b" + integrity sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q== "@esbuild/linux-arm64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.22.0.tgz#77cded446dd0c3b723d272e0243b3d9ddb3cb46e" integrity sha512-V/K2rctCUgC0PCXpN7AqT4hoazXKgIYugFGu/myk2+pfe6jTW2guz/TBwq4cZ7ESqusR/IzkcQaBkcjquuBWsw== -"@esbuild/linux-arm@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz#207ecd982a8db95f7b5279207d0ff2331acf5eef" - integrity sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w== +"@esbuild/linux-arm@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz#fc6fd11a8aca56c1f6f3894f2bea0479f8f626b9" + integrity sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA== "@esbuild/linux-arm@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.22.0.tgz#6587d3e423e09766ea997229827e292e7c4acd6f" integrity sha512-KEMWiA9aGuPUD4BH5yjlhElLgaRXe+Eri6gKBoDazoPBTo1BXc/e6IW5FcJO9DoL19FBeCxgONyh95hLDNepIg== -"@esbuild/linux-ia32@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz#d0d86b5ca1562523dc284a6723293a52d5860601" - integrity sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA== +"@esbuild/linux-ia32@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz#3271f53b3f93e3d093d518d1649d6d68d346ede2" + integrity sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg== "@esbuild/linux-ia32@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.22.0.tgz#2d06d7b4abc443e05a820ff50d4c2d98cc04c22f" integrity sha512-r2ZZqkOMOrpUhzNwxI7uLAHIDwkfeqmTnrv1cjpL/rjllPWszgqmprd/om9oviKXUBpMqHbXmppvjAYgISb26Q== -"@esbuild/linux-loong64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz#9a37f87fec4b8408e682b528391fa22afd952299" - integrity sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA== +"@esbuild/linux-loong64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz#ed62e04238c57026aea831c5a130b73c0f9f26df" + integrity sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg== "@esbuild/linux-loong64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.22.0.tgz#a3e7faabe9a046ac4557bc515ce0981cfe5a6e0f" integrity sha512-qaowLrV/YOMAL2RfKQ4C/VaDzAuLDuylM2sd/LH+4OFirMl6CuDpRlCq4u49ZBaVV8pkI/Y+hTdiibvQRhojCA== -"@esbuild/linux-mips64el@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz#4ddebd4e6eeba20b509d8e74c8e30d8ace0b89ec" - integrity sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w== +"@esbuild/linux-mips64el@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz#e79b8eb48bf3b106fadec1ac8240fb97b4e64cbe" + integrity sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg== "@esbuild/linux-mips64el@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.22.0.tgz#3a2877a78f6719e5eed4cfdded5121c5ab9305a4" integrity sha512-hgrezzjQTRxjkQ5k08J6rtZN5PNnkWx/Rz6Kmj9gnsdCAX1I4Dn4ZPqvFRkXo55Q3pnVQJBwbdtrTO7tMGtyVA== -"@esbuild/linux-ppc64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz#adb67dadb73656849f63cd522f5ecb351dd8dee8" - integrity sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg== +"@esbuild/linux-ppc64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz#5f2203860a143b9919d383ef7573521fb154c3e4" + integrity sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w== "@esbuild/linux-ppc64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.22.0.tgz#6609478066083e05cc1854a8b272daf62a7e944b" integrity sha512-ewxg6FLLUio883XgSjfULEmDl3VPv/TYNnRprVAS3QeGFLdCYdx1tIudBcd7n9jIdk82v1Ajov4jx87qW7h9+g== -"@esbuild/linux-riscv64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz#11bc0698bf0a2abf8727f1c7ace2112612c15adf" - integrity sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg== +"@esbuild/linux-riscv64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz#07bcafd99322d5af62f618cb9e6a9b7f4bb825dc" + integrity sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA== "@esbuild/linux-riscv64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.22.0.tgz#d786a89903cf98e8d34befe6a71c69562bb4ceac" integrity sha512-Az5XbgSJC2lE8XK8pdcutsf9RgdafWdTpUK/+6uaDdfkviw/B4JCwAfh1qVeRWwOohwdsl4ywZrWBNWxwrPLFg== -"@esbuild/linux-s390x@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz#e86fb8ffba7c5c92ba91fc3b27ed5a70196c3cc8" - integrity sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg== +"@esbuild/linux-s390x@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz#b7ccf686751d6a3e44b8627ababc8be3ef62d8de" + integrity sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A== "@esbuild/linux-s390x@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.22.0.tgz#a7ab13ae163307ac615dac5ce7f60a6b0a067d59" integrity sha512-8j4a2ChT9+V34NNNY9c/gMldutaJFmfMacTPq4KfNKwv2fitBCLYjee7c+Vxaha2nUhPK7cXcZpJtJ3+Y7ZdVQ== -"@esbuild/linux-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz#5f37cfdc705aea687dfe5dfbec086a05acfe9c78" - integrity sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg== +"@esbuild/linux-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz#6d8f0c768e070e64309af8004bb94e68ab2bb3b0" + integrity sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ== "@esbuild/linux-x64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.22.0.tgz#24949de431013354da1d8c29e53299798f8c27ef" integrity sha512-JUQyOnpbAkkRFOk/AhsEemz5TfWN4FJZxVObUlnlNCbe7QBl61ZNfM4cwBXayQA6laMJMUcqLHaYQHAB6YQ95Q== -"@esbuild/netbsd-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz#29da566a75324e0d0dd7e47519ba2f7ef168657b" - integrity sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA== +"@esbuild/netbsd-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz#bbe430f60d378ecb88decb219c602667387a6047" + integrity sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg== "@esbuild/netbsd-x64@0.22.0": version "0.22.0" @@ -542,50 +542,50 @@ resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.22.0.tgz#7cb42e3a0d3da039d1a4b7ccbd0c19b0f71ae453" integrity sha512-Ezlhu/YyITmXwKSB+Zu/QqD7cxrjrpiw85cc0Rbd3AWr2wsgp+dWbWOE8MqHaLW9NKMZvuL0DhbJbvzR7F6Zvg== -"@esbuild/openbsd-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz#306c0acbdb5a99c95be98bdd1d47c916e7dc3ff0" - integrity sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw== +"@esbuild/openbsd-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz#99d1cf2937279560d2104821f5ccce220cb2af70" + integrity sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow== "@esbuild/openbsd-x64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.22.0.tgz#194aa9915323962e9ea66c5a13ff3e1db272a683" integrity sha512-ufjdW5tFJGUjlH9j/5cCE9lrwRffyZh+T4vYvoDKoYsC6IXbwaFeV/ENxeNXcxotF0P8CDzoICXVSbJaGBhkrw== -"@esbuild/sunos-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz#0933eaab9af8b9b2c930236f62aae3fc593faf30" - integrity sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA== +"@esbuild/sunos-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz#08741512c10d529566baba837b4fe052c8f3487b" + integrity sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg== "@esbuild/sunos-x64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.22.0.tgz#2be9d2459ae181ebedb6470e4469349a27c4f060" integrity sha512-zY6ly/AoSmKnmNTowDJsK5ehra153/5ZhqxNLfq9NRsTTltetr+yHHcQ4RW7QDqw4JC8A1uC1YmeSfK9NRcK1w== -"@esbuild/win32-arm64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz#773bdbaa1971b36db2f6560088639ccd1e6773ae" - integrity sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A== +"@esbuild/win32-arm64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz#675b7385398411240735016144ab2e99a60fc75d" + integrity sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A== "@esbuild/win32-arm64@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.22.0.tgz#6b4224f2d049c26f37026904210a4293e34c2747" integrity sha512-Kml5F7tv/1Maam0pbbCrvkk9vj046dPej30kFzlhXnhuCtYYBP6FGy/cLbc5yUT1lkZznGLf2OvuvmLjscO5rw== -"@esbuild/win32-ia32@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz#000516cad06354cc84a73f0943a4aa690ef6fd67" - integrity sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ== +"@esbuild/win32-ia32@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz#1bfc3ce98aa6ca9a0969e4d2af72144c59c1193b" + integrity sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA== "@esbuild/win32-ia32@0.22.0": version "0.22.0" resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.22.0.tgz#4a1184f6fd4a7594c4f1e68b1e649248534f7832" integrity sha512-IOgwn+mYTM3RrcydP4Og5IpXh+ftN8oF+HELTXSmbWBlujuci4Qa3DTeO+LEErceisI7KUSfEIiX+WOUlpELkw== -"@esbuild/win32-x64@0.19.12": - version "0.19.12" - resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz#c57c8afbb4054a3ab8317591a0b7320360b444ae" - integrity sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA== +"@esbuild/win32-x64@0.21.5": + version "0.21.5" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz#acad351d582d157bb145535db2a6ff53dd514b5c" + integrity sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw== "@esbuild/win32-x64@0.22.0": version "0.22.0" @@ -2987,34 +2987,34 @@ esbuild@0.22.0: "@esbuild/win32-ia32" "0.22.0" "@esbuild/win32-x64" "0.22.0" -esbuild@^0.19.6: - version "0.19.12" - resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.19.12.tgz#dc82ee5dc79e82f5a5c3b4323a2a641827db3e04" - integrity sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg== +esbuild@^0.21.5: + version "0.21.5" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.21.5.tgz#9ca301b120922959b766360d8ac830da0d02997d" + integrity sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw== optionalDependencies: - "@esbuild/aix-ppc64" "0.19.12" - "@esbuild/android-arm" "0.19.12" - "@esbuild/android-arm64" "0.19.12" - "@esbuild/android-x64" "0.19.12" - "@esbuild/darwin-arm64" "0.19.12" - "@esbuild/darwin-x64" "0.19.12" - "@esbuild/freebsd-arm64" "0.19.12" - "@esbuild/freebsd-x64" "0.19.12" - "@esbuild/linux-arm" "0.19.12" - "@esbuild/linux-arm64" "0.19.12" - "@esbuild/linux-ia32" "0.19.12" - "@esbuild/linux-loong64" "0.19.12" - "@esbuild/linux-mips64el" "0.19.12" - "@esbuild/linux-ppc64" "0.19.12" - "@esbuild/linux-riscv64" "0.19.12" - "@esbuild/linux-s390x" "0.19.12" - "@esbuild/linux-x64" "0.19.12" - "@esbuild/netbsd-x64" "0.19.12" - "@esbuild/openbsd-x64" "0.19.12" - "@esbuild/sunos-x64" "0.19.12" - "@esbuild/win32-arm64" "0.19.12" - "@esbuild/win32-ia32" "0.19.12" - "@esbuild/win32-x64" "0.19.12" + "@esbuild/aix-ppc64" "0.21.5" + "@esbuild/android-arm" "0.21.5" + "@esbuild/android-arm64" "0.21.5" + "@esbuild/android-x64" "0.21.5" + "@esbuild/darwin-arm64" "0.21.5" + "@esbuild/darwin-x64" "0.21.5" + "@esbuild/freebsd-arm64" "0.21.5" + "@esbuild/freebsd-x64" "0.21.5" + "@esbuild/linux-arm" "0.21.5" + "@esbuild/linux-arm64" "0.21.5" + "@esbuild/linux-ia32" "0.21.5" + "@esbuild/linux-loong64" "0.21.5" + "@esbuild/linux-mips64el" "0.21.5" + "@esbuild/linux-ppc64" "0.21.5" + "@esbuild/linux-riscv64" "0.21.5" + "@esbuild/linux-s390x" "0.21.5" + "@esbuild/linux-x64" "0.21.5" + "@esbuild/netbsd-x64" "0.21.5" + "@esbuild/openbsd-x64" "0.21.5" + "@esbuild/sunos-x64" "0.21.5" + "@esbuild/win32-arm64" "0.21.5" + "@esbuild/win32-ia32" "0.21.5" + "@esbuild/win32-x64" "0.21.5" escalade@^3.1.1: version "3.1.2" @@ -3841,12 +3841,12 @@ gulp-cli@^2.2.0: v8flags "^3.2.0" yargs "^7.1.0" -gulp-esbuild@0.12.0: - version "0.12.0" - resolved "https://registry.yarnpkg.com/gulp-esbuild/-/gulp-esbuild-0.12.0.tgz#f9967f7a2f208457cc8b64cbdf404fc4704f449a" - integrity sha512-6el2YFJK+Wiip18G4iMl1rNuetSxpEZTLT1e6GuAsi3Q/yaeoNUgTX7nlwpbFkymGXMI4NFXisg5++PMe+fNNA== +gulp-esbuild@0.12.1: + version "0.12.1" + resolved "https://registry.yarnpkg.com/gulp-esbuild/-/gulp-esbuild-0.12.1.tgz#f91093f0f68e739f455530804aa533577ec6dfc6" + integrity sha512-dkcN2AHtXTVu+KNw0Zw8SWysziNwpYg6kw41E8frUkil5ZtwktIsot/OCLEpRT6clFpVQ7Hw3+YZQvoNdyTF1A== dependencies: - esbuild "^0.19.6" + esbuild "^0.21.5" plugin-error "^2.0.1" vinyl "^3.0.0" From fd6bf5a532245ae80908d52b90d5462f9b2f9882 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:31:25 +0900 Subject: [PATCH 014/122] MINOR: [JS] Bump tslib from 2.6.2 to 2.6.3 in /js (#43105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [tslib](https://github.com/Microsoft/tslib) from 2.6.2 to 2.6.3.
Release notes

Sourced from tslib's releases.

v2.6.3

What's Changed

Full Changelog: https://github.com/microsoft/tslib/compare/v2.6.2...v2.6.3

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tslib&package-manager=npm_and_yarn&previous-version=2.6.2&new-version=2.6.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/yarn.lock b/js/yarn.lock index a4c9bc1089d3d..f8dcd3e5b9eb2 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -6984,9 +6984,9 @@ ts-node@10.9.2: yn "3.1.1" tslib@^2.0.0, tslib@^2.1.0, tslib@^2.4.0, tslib@^2.6.2: - version "2.6.2" - resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" - integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== + version "2.6.3" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.3.tgz#0438f810ad7a9edcde7a241c3d80db693c8cbfe0" + integrity sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ== type-check@^0.4.0, type-check@~0.4.0: version "0.4.0" From beaa47b4d22301988b71ec781d0307652d0451e1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 3 Jul 2024 19:42:37 +0900 Subject: [PATCH 015/122] GH-43122: [CI][Packaging][RPM][CentOS] Use vault.centos.org for SCL (#43127) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We can't use http://mirrorlist.centos.org because CentOS 7 reached EOL. ### What changes are included in this PR? Use https://vault.centos.org/ instead. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #43122 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- dev/release/verify-yum.sh | 5 +++++ .../linux-packages/apache-arrow/yum/centos-7/Dockerfile | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index aad82af21fc96..31ed9c160b48e 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -201,6 +201,11 @@ echo "::group::Test Apache Arrow C++" ${install_command} ${enablerepo_epel} arrow-devel-${package_version} if [ -n "${devtoolset}" ]; then ${install_command} ${scl_package} + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo fi ${install_command} \ ${cmake_package} \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile index 8c029d87c2493..e834e17a4bb30 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile @@ -24,7 +24,7 @@ ENV \ ARG DEBUG # GH-42128 -# Switch repos to point to to vault.centos.org because Centos 7 is EOL +# Switch repos to point to to vault.centos.org because CentOS 7 is EOL RUN sed -i \ -e 's/^mirrorlist/#mirrorlist/' \ -e 's/^#baseurl/baseurl/' \ @@ -37,6 +37,11 @@ RUN \ yum install -y ${quiet} \ centos-release-scl-rh \ epel-release && \ + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo && \ yum install -y ${quiet} \ ${SCL}-gcc-c++ \ ${SCL}-make \ From 127ee853ac5651567447d2b19c7fd2fa69457997 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 05:19:37 -0700 Subject: [PATCH 016/122] MINOR: [C#] Bump Google.Protobuf from 3.27.1 to 3.27.2 in /csharp (#43099) Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.27.1 to 3.27.2.
Commits
  • 63def39 Updating version.json and repo version numbers to: 27.2
  • 19bd211 Port windows bootstrapping fix (#17225)
  • 4923b8d Fix string_type bugs in edition 2023 (#17211)
  • b0a3c23 Merge pull request #17164 from protocolbuffers/cp-stubs
  • 270ca66 Cleanup imports and comments in V3 stubs.
  • 1e360a4 Add stubs for GeneratedMessageV3, RepeatedFieldBuilderV3, SingleFieldBuilderV...
  • 9cfb59b Add simple conformance test that builds the old gencode against the current r...
  • 6c6f514 Merge pull request #17161 from protocolbuffers/backport-java
  • c7a006a Fix checking unknown field set empty which wasn't exposed yet in 27.x
  • 2426a02 Reserialize all unresolved features using java features from the generated po...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.27.1&new-version=3.27.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index be85bd3090e92..5030d37cdb16d 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,7 +5,7 @@ - + From e59832fb05dc40a85fa63297c77c8f134c9ac8e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 3 Jul 2024 16:17:38 +0200 Subject: [PATCH 017/122] GH-43119: [CI][Packaging] Update manylinux 2014 CentOS repos that have been deprecated (#43121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Jobs are failing to find mirrorlist.centos.org ### What changes are included in this PR? Updating repos based on solution from: https://github.com/apache/arrow/issues/43119#issuecomment-2203534492 ### Are these changes tested? Via archery ### Are there any user-facing changes? No * GitHub Issue: #43119 Lead-authored-by: Raúl Cumplido Co-authored-by: Sutou Kouhei Co-authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- ci/docker/centos-7-cpp.dockerfile | 14 ++++++++++++++ ci/docker/python-wheel-manylinux.dockerfile | 12 ++++++++++++ 2 files changed, 26 insertions(+) diff --git a/ci/docker/centos-7-cpp.dockerfile b/ci/docker/centos-7-cpp.dockerfile index 8c1893cbbb2ae..1f30eed694e4e 100644 --- a/ci/docker/centos-7-cpp.dockerfile +++ b/ci/docker/centos-7-cpp.dockerfile @@ -17,11 +17,25 @@ FROM centos:centos7 +# Update mirrors to use vault.centos.org as CentOS 7 +# is EOL since 2024-06-30 +RUN sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/*.repo + # devtoolset is required for C++17 RUN \ yum install -y \ centos-release-scl \ epel-release && \ + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/^# baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/CentOS-SCLo-scl*.repo && \ yum install -y \ cmake3 \ curl \ diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile index 68d4b27d2ca96..cb39667af1e10 100644 --- a/ci/docker/python-wheel-manylinux.dockerfile +++ b/ci/docker/python-wheel-manylinux.dockerfile @@ -25,6 +25,18 @@ ARG manylinux ENV MANYLINUX_VERSION=${manylinux} # Ensure dnf is installed, especially for the manylinux2014 base +RUN if [ "${MANYLINUX_VERSION}" = "2014" ]; then \ + sed -i \ + -e 's/^mirrorlist/#mirrorlist/' \ + -e 's/^#baseurl/baseurl/' \ + -e 's/mirror\.centos\.org/vault.centos.org/' \ + /etc/yum.repos.d/*.repo; \ + if [ "${arch}" != "amd64" ]; then \ + sed -i \ + -e 's,vault\.centos\.org/centos,vault.centos.org/altarch,' \ + /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo; \ + fi; \ + fi RUN yum install -y dnf # Install basic dependencies From b044a51f410e457592394e25324a5ce6f2276c9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 11:19:21 -0400 Subject: [PATCH 018/122] MINOR: [JS] Bump eslint-plugin-unicorn from 53.0.0 to 54.0.0 in /js (#43101) Bumps [eslint-plugin-unicorn](https://github.com/sindresorhus/eslint-plugin-unicorn) from 53.0.0 to 54.0.0.
Release notes

Sourced from eslint-plugin-unicorn's releases.

v54.0.0

New rules

Breaking

Improvements

  • Add name to flat configs (#2377) ac8536e
  • prefer-array-some: Check Array#{findIndex,findLastIndex}() (#2370) 10568ab
  • prefer-includes: Check .lastIndexOf() (#2368) d812ad1
  • prefer-string-raw: Ignore strings in Enums (#2354) 175ea04

Fixes

  • Fix edge cases when add new to calls (#2352) d8f8161

https://github.com/sindresorhus/eslint-plugin-unicorn/compare/v53.0.0...v54.0.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=eslint-plugin-unicorn&package-manager=npm_and_yarn&previous-version=53.0.0&new-version=54.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dominik Moritz --- js/package.json | 2 +- js/src/row/struct.ts | 4 ++-- js/yarn.lock | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/js/package.json b/js/package.json index 4f7c8f27cf3f8..cc8fc6f9b4708 100644 --- a/js/package.json +++ b/js/package.json @@ -83,7 +83,7 @@ "esbuild-plugin-alias": "0.2.1", "eslint": "8.57.0", "eslint-plugin-jest": "28.5.0", - "eslint-plugin-unicorn": "53.0.0", + "eslint-plugin-unicorn": "54.0.0", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", "gulp": "4.0.2", "glob": "10.4.1", diff --git a/js/src/row/struct.ts b/js/src/row/struct.ts index 094f6a4b11ac8..bc3869cb8d08f 100644 --- a/js/src/row/struct.ts +++ b/js/src/row/struct.ts @@ -124,10 +124,10 @@ class StructRowProxyHandler implements ProxyHandler f.name); } has(row: StructRow, key: string) { - return row[kParent].type.children.findIndex((f) => f.name === key) !== -1; + return row[kParent].type.children.some((f) => f.name === key); } getOwnPropertyDescriptor(row: StructRow, key: string) { - if (row[kParent].type.children.findIndex((f) => f.name === key) !== -1) { + if (row[kParent].type.children.some((f) => f.name === key)) { return { writable: true, enumerable: true, configurable: true }; } return; diff --git a/js/yarn.lock b/js/yarn.lock index f8dcd3e5b9eb2..cfa45edef4f00 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -3048,10 +3048,10 @@ eslint-plugin-jest@28.5.0: dependencies: "@typescript-eslint/utils" "^6.0.0 || ^7.0.0" -eslint-plugin-unicorn@53.0.0: - version "53.0.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-53.0.0.tgz#df3a5c9ecabeb759e6fd867b2d84198466ac8c4d" - integrity sha512-kuTcNo9IwwUCfyHGwQFOK/HjJAYzbODHN3wP0PgqbW+jbXqpNWxNVpVhj2tO9SixBwuAdmal8rVcWKBxwFnGuw== +eslint-plugin-unicorn@54.0.0: + version "54.0.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-54.0.0.tgz#ce3ea853e8fd7ca2bda2fd6065bf065adb5d8b6d" + integrity sha512-XxYLRiYtAWiAjPv6z4JREby1TAE2byBC7wlh0V4vWDCpccOSU1KovWV//jqPXF6bq3WKxqX9rdjoRQ1EhdmNdQ== dependencies: "@babel/helper-validator-identifier" "^7.24.5" "@eslint-community/eslint-utils" "^4.4.0" From 1e1d64ab7b74bfa65cbd46bdc79f3c2396dcc9f7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:53:30 -0400 Subject: [PATCH 019/122] MINOR: [Java] Bump dep.junit.jupiter.version from 5.10.2 to 5.10.3 in /java (#43109) Bumps `dep.junit.jupiter.version` from 5.10.2 to 5.10.3. Updates `org.junit.jupiter:junit-jupiter-engine` from 5.10.2 to 5.10.3
Release notes

Sourced from org.junit.jupiter:junit-jupiter-engine's releases.

JUnit 5.10.3 = Platform 1.10.3 + Jupiter 5.10.3 + Vintage 5.10.3

See Release Notes.

Full Changelog: https://github.com/junit-team/junit5/compare/r5.10.2...r5.10.3

Commits
  • 55d1232 Release 5.10.3
  • 4455093 Remove accidental heading
  • b86a681 Remove JDK 21 as it's no longer available via Oracle's setup-java step
  • bc1608c Clean up release notes
  • 316ed31 Use Liberica for JDK 8 for compatibility with Apple Silicon
  • ad7c5dd Update copyright headers to 2024
  • 6c663b1 Use same default seed for method and class ordering (#3821)
  • d29e3eb Fix NPE when deserializing TestIdentifier (#3820)
  • f936c01 Fix class-level execution conditions on GraalVM (#3785)
  • 76b7c05 Allow GraalVmStarterTests to run remotely on Test Distribution agents
  • Additional commits viewable in compare view

Updates `org.junit.jupiter:junit-jupiter-api` from 5.10.2 to 5.10.3
Release notes

Sourced from org.junit.jupiter:junit-jupiter-api's releases.

JUnit 5.10.3 = Platform 1.10.3 + Jupiter 5.10.3 + Vintage 5.10.3

See Release Notes.

Full Changelog: https://github.com/junit-team/junit5/compare/r5.10.2...r5.10.3

Commits
  • 55d1232 Release 5.10.3
  • 4455093 Remove accidental heading
  • b86a681 Remove JDK 21 as it's no longer available via Oracle's setup-java step
  • bc1608c Clean up release notes
  • 316ed31 Use Liberica for JDK 8 for compatibility with Apple Silicon
  • ad7c5dd Update copyright headers to 2024
  • 6c663b1 Use same default seed for method and class ordering (#3821)
  • d29e3eb Fix NPE when deserializing TestIdentifier (#3820)
  • f936c01 Fix class-level execution conditions on GraalVM (#3785)
  • 76b7c05 Allow GraalVmStarterTests to run remotely on Test Distribution agents
  • Additional commits viewable in compare view

Updates `org.junit.jupiter:junit-jupiter-params` from 5.10.2 to 5.10.3
Release notes

Sourced from org.junit.jupiter:junit-jupiter-params's releases.

JUnit 5.10.3 = Platform 1.10.3 + Jupiter 5.10.3 + Vintage 5.10.3

See Release Notes.

Full Changelog: https://github.com/junit-team/junit5/compare/r5.10.2...r5.10.3

Commits
  • 55d1232 Release 5.10.3
  • 4455093 Remove accidental heading
  • b86a681 Remove JDK 21 as it's no longer available via Oracle's setup-java step
  • bc1608c Clean up release notes
  • 316ed31 Use Liberica for JDK 8 for compatibility with Apple Silicon
  • ad7c5dd Update copyright headers to 2024
  • 6c663b1 Use same default seed for method and class ordering (#3821)
  • d29e3eb Fix NPE when deserializing TestIdentifier (#3820)
  • f936c01 Fix class-level execution conditions on GraalVM (#3785)
  • 76b7c05 Allow GraalVmStarterTests to run remotely on Test Distribution agents
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Dane Pitkin --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index ace7a1e1f173e..b047aa5d78374 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -88,7 +88,7 @@ under the License. ${project.build.directory}/generated-sources 1.9.0 - 5.10.2 + 5.10.3 2.0.13 33.2.1-jre 4.1.110.Final From e0c364b1e52fa2acc2e1e818c85c1bf6a977254f Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Wed, 3 Jul 2024 18:28:32 -0400 Subject: [PATCH 020/122] MINOR: Add laurentgo to contributors list (#43140) ### Rationale for this change @ laurentgo has been making significant contributions to Java's build system. This will allow them to run crossbow via GH actions for a faster test cycle. ### What changes are included in this PR? * Add @ laurentgo to .asf.yaml ### Are these changes tested? n/a ### Are there any user-facing changes? No. Authored-by: Dane Pitkin Signed-off-by: David Li --- .asf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.asf.yaml b/.asf.yaml index a1c6434587703..28d280e477106 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -24,6 +24,7 @@ github: - davisusanibar - jbonofre - js8544 + - laurentgo - vibhatha - zanmato1984 - ZhangHuiGui From eadeb743151f5b1ded3d04d9a65185ff872d288e Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 4 Jul 2024 17:27:30 +1200 Subject: [PATCH 021/122] GH-43070: [C++][Parquet] Check for valid ciphertext length to prevent segfault (#43071) ### Rationale for this change See #43070 ### What changes are included in this PR? Checks that the ciphertext length is at least enough to hold the length (if written), nonce and GCM tag for the GCM cipher type. Also enforces that the input ciphertext length parameter is provided (is > 0) and verifies that the ciphertext size read from the file isn't going to cause reads beyond the end of the ciphertext buffer. ### Are these changes tested? Yes I've added new unit tests for this. ### Are there any user-facing changes? No * GitHub Issue: #43070 Authored-by: Adam Reeve Signed-off-by: mwish --- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/column_reader.cc | 5 +- cpp/src/parquet/encryption/encryption.h | 8 + .../parquet/encryption/encryption_internal.cc | 208 ++++++++++++------ .../parquet/encryption/encryption_internal.h | 20 +- .../encryption/encryption_internal_nossl.cc | 14 +- .../encryption/encryption_internal_test.cc | 144 ++++++++++++ .../encryption/internal_file_decryptor.cc | 16 +- .../encryption/internal_file_decryptor.h | 6 +- .../encryption/key_toolkit_internal.cc | 12 +- .../encryption/read_configurations_test.cc | 1 + cpp/src/parquet/thrift_internal.h | 16 +- 12 files changed, 349 insertions(+), 102 deletions(-) create mode 100644 cpp/src/parquet/encryption/encryption_internal_test.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index dc80f08e72cfe..17574261d891d 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -408,6 +408,7 @@ add_parquet_test(arrow-internals-test SOURCES arrow/path_internal_test.cc if(PARQUET_REQUIRE_ENCRYPTION) add_parquet_test(encryption-test SOURCES + encryption/encryption_internal_test.cc encryption/write_configurations_test.cc encryption/read_configurations_test.cc encryption/properties_test.cc diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 52add8f339fc3..ebf9515f27607 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -512,10 +512,11 @@ std::shared_ptr SerializedPageReader::NextPage() { // Decrypt it if we need to if (crypto_ctx_.data_decryptor != nullptr) { PARQUET_THROW_NOT_OK(decryption_buffer_->Resize( - compressed_len - crypto_ctx_.data_decryptor->CiphertextSizeDelta(), + crypto_ctx_.data_decryptor->PlaintextLength(compressed_len), /*shrink_to_fit=*/false)); compressed_len = crypto_ctx_.data_decryptor->Decrypt( - page_buffer->data(), compressed_len, decryption_buffer_->mutable_data()); + page_buffer->span_as(), + decryption_buffer_->mutable_span_as()); page_buffer = decryption_buffer_; } diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 8fd7ec8d3d015..1ddef9e8236db 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -89,6 +89,14 @@ inline const uint8_t* str2bytes(const std::string& str) { return reinterpret_cast(cbytes); } +inline ::arrow::util::span str2span(const std::string& str) { + if (str.empty()) { + return {}; + } + + return {reinterpret_cast(str.data()), str.size()}; +} + class PARQUET_EXPORT ColumnEncryptionProperties { public: class PARQUET_EXPORT Builder { diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 465b14793219f..c5d2d1728ba1e 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -31,6 +31,7 @@ #include "parquet/encryption/openssl_internal.h" #include "parquet/exception.h" +using ::arrow::util::span; using parquet::ParquetException; namespace parquet::encryption { @@ -315,8 +316,8 @@ class AesDecryptor::AesDecryptorImpl { ~AesDecryptorImpl() { WipeOut(); } - int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); + int Decrypt(span ciphertext, span key, + span aad, span plaintext); void WipeOut() { if (nullptr != ctx_) { @@ -325,7 +326,24 @@ class AesDecryptor::AesDecryptorImpl { } } - int ciphertext_size_delta() { return ciphertext_size_delta_; } + [[nodiscard]] int PlaintextLength(int ciphertext_len) const { + if (ciphertext_len < ciphertext_size_delta_) { + std::stringstream ss; + ss << "Ciphertext length " << ciphertext_len << " is invalid, expected at least " + << ciphertext_size_delta_; + throw ParquetException(ss.str()); + } + return ciphertext_len - ciphertext_size_delta_; + } + + [[nodiscard]] int CiphertextLength(int plaintext_len) const { + if (plaintext_len < 0) { + std::stringstream ss; + ss << "Negative plaintext length " << plaintext_len; + throw ParquetException(ss.str()); + } + return plaintext_len + ciphertext_size_delta_; + } private: EVP_CIPHER_CTX* ctx_; @@ -333,17 +351,21 @@ class AesDecryptor::AesDecryptorImpl { int key_length_; int ciphertext_size_delta_; int length_buffer_length_; - int GcmDecrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); - int CtrDecrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, uint8_t* plaintext); + /// Get the actual ciphertext length, inclusive of the length buffer length, + /// and validate that the provided buffer size is large enough. + [[nodiscard]] int GetCiphertextLength(span ciphertext) const; + + int GcmDecrypt(span ciphertext, span key, + span aad, span plaintext); + + int CtrDecrypt(span ciphertext, span key, + span plaintext); }; -int AesDecryptor::Decrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { - return impl_->Decrypt(plaintext, plaintext_len, key, key_len, aad, aad_len, ciphertext); +int AesDecryptor::Decrypt(span ciphertext, span key, + span aad, span plaintext) { + return impl_->Decrypt(ciphertext, key, aad, plaintext); } void AesDecryptor::WipeOut() { impl_->WipeOut(); } @@ -438,56 +460,105 @@ std::shared_ptr AesDecryptor::Make( return decryptor; } -int AesDecryptor::CiphertextSizeDelta() { return impl_->ciphertext_size_delta(); } - -int AesDecryptor::AesDecryptorImpl::GcmDecrypt(const uint8_t* ciphertext, - int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, - int aad_len, uint8_t* plaintext) { - int len; - int plaintext_len; +int AesDecryptor::PlaintextLength(int ciphertext_len) const { + return impl_->PlaintextLength(ciphertext_len); +} - uint8_t tag[kGcmTagLength]; - memset(tag, 0, kGcmTagLength); - uint8_t nonce[kNonceLength]; - memset(nonce, 0, kNonceLength); +int AesDecryptor::CiphertextLength(int plaintext_len) const { + return impl_->CiphertextLength(plaintext_len); +} +int AesDecryptor::AesDecryptorImpl::GetCiphertextLength( + span ciphertext) const { if (length_buffer_length_ > 0) { + // Note: length_buffer_length_ must be either 0 or kBufferSizeLength + if (ciphertext.size() < static_cast(kBufferSizeLength)) { + std::stringstream ss; + ss << "Ciphertext buffer length " << ciphertext.size() + << " is insufficient to read the ciphertext length." + << " At least " << kBufferSizeLength << " bytes are required."; + throw ParquetException(ss.str()); + } + // Extract ciphertext length int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) | ((ciphertext[2] & 0xff) << 16) | ((ciphertext[1] & 0xff) << 8) | ((ciphertext[0] & 0xff)); - if (ciphertext_len > 0 && - ciphertext_len != (written_ciphertext_len + length_buffer_length_)) { - throw ParquetException("Wrong ciphertext length"); + if (written_ciphertext_len < 0) { + std::stringstream ss; + ss << "Negative ciphertext length " << written_ciphertext_len; + throw ParquetException(ss.str()); + } else if (ciphertext.size() < + static_cast(written_ciphertext_len) + length_buffer_length_) { + std::stringstream ss; + ss << "Serialized ciphertext length " + << (written_ciphertext_len + length_buffer_length_) + << " is greater than the provided ciphertext buffer length " + << ciphertext.size(); + throw ParquetException(ss.str()); } - ciphertext_len = written_ciphertext_len + length_buffer_length_; + + return written_ciphertext_len + length_buffer_length_; } else { - if (ciphertext_len == 0) { - throw ParquetException("Zero ciphertext length"); + if (ciphertext.size() > static_cast(std::numeric_limits::max())) { + std::stringstream ss; + ss << "Ciphertext buffer length " << ciphertext.size() << " overflows int32"; + throw ParquetException(ss.str()); } + return static_cast(ciphertext.size()); + } +} + +int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span ciphertext, + span key, + span aad, + span plaintext) { + int len; + int plaintext_len; + + uint8_t tag[kGcmTagLength]; + memset(tag, 0, kGcmTagLength); + uint8_t nonce[kNonceLength]; + memset(nonce, 0, kNonceLength); + + int ciphertext_len = GetCiphertextLength(ciphertext); + + if (plaintext.size() < static_cast(ciphertext_len) - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext buffer length " << plaintext.size() << " is insufficient " + << "for ciphertext length " << ciphertext_len; + throw ParquetException(ss.str()); + } + + if (ciphertext_len < length_buffer_length_ + kNonceLength + kGcmTagLength) { + std::stringstream ss; + ss << "Invalid ciphertext length " << ciphertext_len << ". Expected at least " + << length_buffer_length_ + kNonceLength + kGcmTagLength << "\n"; + throw ParquetException(ss.str()); } // Extracting IV and tag - std::copy(ciphertext + length_buffer_length_, - ciphertext + length_buffer_length_ + kNonceLength, nonce); - std::copy(ciphertext + ciphertext_len - kGcmTagLength, ciphertext + ciphertext_len, - tag); + std::copy(ciphertext.begin() + length_buffer_length_, + ciphertext.begin() + length_buffer_length_ + kNonceLength, nonce); + std::copy(ciphertext.begin() + ciphertext_len - kGcmTagLength, + ciphertext.begin() + ciphertext_len, tag); // Setting key and IV - if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key, nonce)) { + if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), nonce)) { throw ParquetException("Couldn't set key and IV"); } // Setting additional authenticated data - if ((nullptr != aad) && (1 != EVP_DecryptUpdate(ctx_, nullptr, &len, aad, aad_len))) { + if ((!aad.empty()) && (1 != EVP_DecryptUpdate(ctx_, nullptr, &len, aad.data(), + static_cast(aad.size())))) { throw ParquetException("Couldn't set AAD"); } // Decryption if (!EVP_DecryptUpdate( - ctx_, plaintext, &len, ciphertext + length_buffer_length_ + kNonceLength, + ctx_, plaintext.data(), &len, + ciphertext.data() + length_buffer_length_ + kNonceLength, ciphertext_len - length_buffer_length_ - kNonceLength - kGcmTagLength)) { throw ParquetException("Failed decryption update"); } @@ -500,7 +571,7 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(const uint8_t* ciphertext, } // Finalization - if (1 != EVP_DecryptFinal_ex(ctx_, plaintext + len, &len)) { + if (1 != EVP_DecryptFinal_ex(ctx_, plaintext.data() + len, &len)) { throw ParquetException("Failed decryption finalization"); } @@ -508,35 +579,34 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(const uint8_t* ciphertext, return plaintext_len; } -int AesDecryptor::AesDecryptorImpl::CtrDecrypt(const uint8_t* ciphertext, - int ciphertext_len, const uint8_t* key, - int key_len, uint8_t* plaintext) { +int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span ciphertext, + span key, + span plaintext) { int len; int plaintext_len; uint8_t iv[kCtrIvLength]; memset(iv, 0, kCtrIvLength); - if (length_buffer_length_ > 0) { - // Extract ciphertext length - int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) | - ((ciphertext[2] & 0xff) << 16) | - ((ciphertext[1] & 0xff) << 8) | ((ciphertext[0] & 0xff)); + int ciphertext_len = GetCiphertextLength(ciphertext); - if (ciphertext_len > 0 && - ciphertext_len != (written_ciphertext_len + length_buffer_length_)) { - throw ParquetException("Wrong ciphertext length"); - } - ciphertext_len = written_ciphertext_len; - } else { - if (ciphertext_len == 0) { - throw ParquetException("Zero ciphertext length"); - } + if (plaintext.size() < static_cast(ciphertext_len) - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext buffer length " << plaintext.size() << " is insufficient " + << "for ciphertext length " << ciphertext_len; + throw ParquetException(ss.str()); + } + + if (ciphertext_len < length_buffer_length_ + kNonceLength) { + std::stringstream ss; + ss << "Invalid ciphertext length " << ciphertext_len << ". Expected at least " + << length_buffer_length_ + kNonceLength << "\n"; + throw ParquetException(ss.str()); } // Extracting nonce - std::copy(ciphertext + length_buffer_length_, - ciphertext + length_buffer_length_ + kNonceLength, iv); + std::copy(ciphertext.begin() + length_buffer_length_, + ciphertext.begin() + length_buffer_length_ + kNonceLength, iv); // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial // counter field. // The first 31 bits of the initial counter field are set to 0, the last bit @@ -544,21 +614,21 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(const uint8_t* ciphertext, iv[kCtrIvLength - 1] = 1; // Setting key and IV - if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key, iv)) { + if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), iv)) { throw ParquetException("Couldn't set key and IV"); } // Decryption - if (!EVP_DecryptUpdate(ctx_, plaintext, &len, - ciphertext + length_buffer_length_ + kNonceLength, - ciphertext_len - kNonceLength)) { + if (!EVP_DecryptUpdate(ctx_, plaintext.data(), &len, + ciphertext.data() + length_buffer_length_ + kNonceLength, + ciphertext_len - length_buffer_length_ - kNonceLength)) { throw ParquetException("Failed decryption update"); } plaintext_len = len; // Finalization - if (1 != EVP_DecryptFinal_ex(ctx_, plaintext + len, &len)) { + if (1 != EVP_DecryptFinal_ex(ctx_, plaintext.data() + len, &len)) { throw ParquetException("Failed decryption finalization"); } @@ -566,21 +636,21 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(const uint8_t* ciphertext, return plaintext_len; } -int AesDecryptor::AesDecryptorImpl::Decrypt(const uint8_t* ciphertext, int ciphertext_len, - const uint8_t* key, int key_len, - const uint8_t* aad, int aad_len, - uint8_t* plaintext) { - if (key_length_ != key_len) { +int AesDecryptor::AesDecryptorImpl::Decrypt(span ciphertext, + span key, + span aad, + span plaintext) { + if (static_cast(key_length_) != key.size()) { std::stringstream ss; - ss << "Wrong key length " << key_len << ". Should be " << key_length_; + ss << "Wrong key length " << key.size() << ". Should be " << key_length_; throw ParquetException(ss.str()); } if (kGcmMode == aes_mode_) { - return GcmDecrypt(ciphertext, ciphertext_len, key, key_len, aad, aad_len, plaintext); + return GcmDecrypt(ciphertext, key, aad, plaintext); } - return CtrDecrypt(ciphertext, ciphertext_len, key, key_len, plaintext); + return CtrDecrypt(ciphertext, key, plaintext); } static std::string ShortToBytesLe(int16_t input) { diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 1bdf47c56f291..2d5450553c16d 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -21,6 +21,7 @@ #include #include +#include "arrow/util/span.h" #include "parquet/properties.h" #include "parquet/types.h" @@ -44,7 +45,7 @@ constexpr int8_t kBloomFilterHeader = 8; constexpr int8_t kBloomFilterBitset = 9; /// Performs AES encryption operations with GCM or CTR ciphers. -class AesEncryptor { +class PARQUET_EXPORT AesEncryptor { public: /// Can serve one key length only. Possible values: 16, 24, 32 bytes. /// If write_length is true, prepend ciphertext length to the ciphertext @@ -82,7 +83,7 @@ class AesEncryptor { }; /// Performs AES decryption operations with GCM or CTR ciphers. -class AesDecryptor { +class PARQUET_EXPORT AesDecryptor { public: /// Can serve one key length only. Possible values: 16, 24, 32 bytes. /// If contains_length is true, expect ciphertext length prepended to the ciphertext @@ -104,13 +105,20 @@ class AesDecryptor { ~AesDecryptor(); void WipeOut(); - /// Size difference between plaintext and ciphertext, for this cipher. - int CiphertextSizeDelta(); + /// The size of the plaintext, for this cipher and the specified ciphertext length. + [[nodiscard]] int PlaintextLength(int ciphertext_len) const; + + /// The size of the ciphertext, for this cipher and the specified plaintext length. + [[nodiscard]] int CiphertextLength(int plaintext_len) const; /// Decrypts ciphertext with the key and aad. Key length is passed only for /// validation. If different from value in constructor, exception will be thrown. - int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext); + /// The caller is responsible for ensuring that the plaintext buffer is at least as + /// large as PlaintextLength(ciphertext_len). + int Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span plaintext); private: // PIMPL Idiom diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index ead868643baae..ed323c4aa6167 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -58,9 +58,10 @@ AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadat class AesDecryptor::AesDecryptorImpl {}; -int AesDecryptor::Decrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { +int AesDecryptor::Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span plaintext) { ThrowOpenSSLRequiredException(); return -1; } @@ -91,7 +92,12 @@ std::shared_ptr AesDecryptor::Make( return NULLPTR; } -int AesDecryptor::CiphertextSizeDelta() { +int AesDecryptor::PlaintextLength(int ciphertext_len) const { + ThrowOpenSSLRequiredException(); + return -1; +} + +int AesDecryptor::CiphertextLength(int plaintext_len) const { ThrowOpenSSLRequiredException(); return -1; } diff --git a/cpp/src/parquet/encryption/encryption_internal_test.cc b/cpp/src/parquet/encryption/encryption_internal_test.cc new file mode 100644 index 0000000000000..cf7eeef4c6446 --- /dev/null +++ b/cpp/src/parquet/encryption/encryption_internal_test.cc @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "parquet/encryption/encryption_internal.h" + +namespace parquet::encryption::test { + +class TestAesEncryption : public ::testing::Test { + protected: + void SetUp() override { + key_length_ = 16; + key_ = "1234567890123450"; + aad_ = "abcdefgh"; + plain_text_ = + "Apache Parquet is an open source, column-oriented data file format designed for " + "efficient data storage and retrieval"; + } + + void EncryptionRoundTrip(ParquetCipher::type cipher_type, bool write_length) { + bool metadata = false; + + AesEncryptor encryptor(cipher_type, key_length_, metadata, write_length); + + int expected_ciphertext_len = + static_cast(plain_text_.size()) + encryptor.CiphertextSizeDelta(); + std::vector ciphertext(expected_ciphertext_len, '\0'); + + int ciphertext_length = + encryptor.Encrypt(str2bytes(plain_text_), static_cast(plain_text_.size()), + str2bytes(key_), static_cast(key_.size()), str2bytes(aad_), + static_cast(aad_.size()), ciphertext.data()); + + ASSERT_EQ(ciphertext_length, expected_ciphertext_len); + + AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); + + int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length); + std::vector decrypted_text(expected_plaintext_length, '\0'); + + int plaintext_length = + decryptor.Decrypt(ciphertext, str2span(key_), str2span(aad_), decrypted_text); + + std::string decrypted_text_str(decrypted_text.begin(), decrypted_text.end()); + + ASSERT_EQ(plaintext_length, static_cast(plain_text_.size())); + ASSERT_EQ(plaintext_length, expected_plaintext_length); + ASSERT_EQ(decrypted_text_str, plain_text_); + } + + void DecryptInvalidCiphertext(ParquetCipher::type cipher_type) { + bool metadata = false; + bool write_length = true; + + AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); + + // Create ciphertext of all zeros, so the ciphertext length will be read as zero + const int ciphertext_length = 100; + std::vector ciphertext(ciphertext_length, '\0'); + + int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length); + std::vector decrypted_text(expected_plaintext_length, '\0'); + + EXPECT_THROW( + decryptor.Decrypt(ciphertext, str2span(key_), str2span(aad_), decrypted_text), + ParquetException); + } + + void DecryptCiphertextBufferTooSmall(ParquetCipher::type cipher_type) { + bool metadata = false; + bool write_length = true; + + AesEncryptor encryptor(cipher_type, key_length_, metadata, write_length); + + int expected_ciphertext_len = + static_cast(plain_text_.size()) + encryptor.CiphertextSizeDelta(); + std::vector ciphertext(expected_ciphertext_len, '\0'); + + int ciphertext_length = + encryptor.Encrypt(str2bytes(plain_text_), static_cast(plain_text_.size()), + str2bytes(key_), static_cast(key_.size()), str2bytes(aad_), + static_cast(aad_.size()), ciphertext.data()); + + AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); + + int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length); + std::vector decrypted_text(expected_plaintext_length, '\0'); + + ::arrow::util::span truncated_ciphertext(ciphertext.data(), + ciphertext_length - 1); + EXPECT_THROW(decryptor.Decrypt(truncated_ciphertext, str2span(key_), str2span(aad_), + decrypted_text), + ParquetException); + } + + private: + int key_length_ = 0; + std::string key_; + std::string aad_; + std::string plain_text_; +}; + +TEST_F(TestAesEncryption, AesGcmRoundTrip) { + EncryptionRoundTrip(ParquetCipher::AES_GCM_V1, /*write_length=*/true); + EncryptionRoundTrip(ParquetCipher::AES_GCM_V1, /*write_length=*/false); +} + +TEST_F(TestAesEncryption, AesGcmCtrRoundTrip) { + EncryptionRoundTrip(ParquetCipher::AES_GCM_CTR_V1, /*write_length=*/true); + EncryptionRoundTrip(ParquetCipher::AES_GCM_CTR_V1, /*write_length=*/false); +} + +TEST_F(TestAesEncryption, AesGcmDecryptInvalidCiphertext) { + DecryptInvalidCiphertext(ParquetCipher::AES_GCM_V1); +} + +TEST_F(TestAesEncryption, AesGcmCtrDecryptInvalidCiphertext) { + DecryptInvalidCiphertext(ParquetCipher::AES_GCM_CTR_V1); +} + +TEST_F(TestAesEncryption, AesGcmDecryptCiphertextBufferTooSmall) { + DecryptCiphertextBufferTooSmall(ParquetCipher::AES_GCM_V1); +} + +TEST_F(TestAesEncryption, AesGcmCtrDecryptCiphertextBufferTooSmall) { + DecryptCiphertextBufferTooSmall(ParquetCipher::AES_GCM_CTR_V1); +} + +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index c4416df90b121..a900a4d2eb094 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -33,13 +33,17 @@ Decryptor::Decryptor(std::shared_ptr aes_decryptor, aad_(aad), pool_(pool) {} -int Decryptor::CiphertextSizeDelta() { return aes_decryptor_->CiphertextSizeDelta(); } +int Decryptor::PlaintextLength(int ciphertext_len) const { + return aes_decryptor_->PlaintextLength(ciphertext_len); +} + +int Decryptor::CiphertextLength(int plaintext_len) const { + return aes_decryptor_->CiphertextLength(plaintext_len); +} -int Decryptor::Decrypt(const uint8_t* ciphertext, int ciphertext_len, - uint8_t* plaintext) { - return aes_decryptor_->Decrypt(ciphertext, ciphertext_len, str2bytes(key_), - static_cast(key_.size()), str2bytes(aad_), - static_cast(aad_.size()), plaintext); +int Decryptor::Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span plaintext) { + return aes_decryptor_->Decrypt(ciphertext, str2span(key_), str2span(aad_), plaintext); } // InternalFileDecryptor diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index f12cdefbe67a7..8af3587acf884 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -45,8 +45,10 @@ class PARQUET_EXPORT Decryptor { void UpdateAad(const std::string& aad) { aad_ = aad; } ::arrow::MemoryPool* pool() { return pool_; } - int CiphertextSizeDelta(); - int Decrypt(const uint8_t* ciphertext, int ciphertext_len, uint8_t* plaintext); + [[nodiscard]] int PlaintextLength(int ciphertext_len) const; + [[nodiscard]] int CiphertextLength(int plaintext_len) const; + int Decrypt(::arrow::util::span ciphertext, + ::arrow::util::span plaintext); private: std::shared_ptr aes_decryptor_; diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index bdd65d8de3919..a3c7c996b130a 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -55,15 +55,13 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, false /*contains_length*/); int decrypted_key_len = - static_cast(encrypted_key.size()) - key_decryptor.CiphertextSizeDelta(); + key_decryptor.PlaintextLength(static_cast(encrypted_key.size())); std::string decrypted_key(decrypted_key_len, '\0'); + ::arrow::util::span decrypted_key_span( + reinterpret_cast(&decrypted_key[0]), decrypted_key_len); - decrypted_key_len = key_decryptor.Decrypt( - reinterpret_cast(encrypted_key.data()), - static_cast(encrypted_key.size()), - reinterpret_cast(master_key.data()), - static_cast(master_key.size()), reinterpret_cast(aad.data()), - static_cast(aad.size()), reinterpret_cast(&decrypted_key[0])); + decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), str2span(master_key), + str2span(aad), decrypted_key_span); return decrypted_key; } diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 94fb6362269e2..f450f9274c261 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -22,6 +22,7 @@ #include "arrow/io/file.h" #include "arrow/testing/gtest_compat.h" +#include "arrow/util/config.h" #include "parquet/column_reader.h" #include "parquet/column_writer.h" diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 9e02331b44ba0..4e4d7ed9837df 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -411,17 +411,21 @@ class ThriftDeserializer { // thrift message is encrypted uint32_t clen; clen = *len; + if (clen > static_cast(std::numeric_limits::max())) { + std::stringstream ss; + ss << "Cannot decrypt buffer with length " << clen << ", which overflows int32\n"; + throw ParquetException(ss.str()); + } // decrypt - auto decrypted_buffer = std::static_pointer_cast( - AllocateBuffer(decryptor->pool(), - static_cast(clen - decryptor->CiphertextSizeDelta()))); - const uint8_t* cipher_buf = buf; + auto decrypted_buffer = std::static_pointer_cast(AllocateBuffer( + decryptor->pool(), decryptor->PlaintextLength(static_cast(clen)))); + ::arrow::util::span cipher_buf(buf, clen); uint32_t decrypted_buffer_len = - decryptor->Decrypt(cipher_buf, 0, decrypted_buffer->mutable_data()); + decryptor->Decrypt(cipher_buf, decrypted_buffer->mutable_span_as()); if (decrypted_buffer_len <= 0) { throw ParquetException("Couldn't decrypt buffer\n"); } - *len = decrypted_buffer_len + decryptor->CiphertextSizeDelta(); + *len = decryptor->CiphertextLength(static_cast(decrypted_buffer_len)); DeserializeUnencryptedMessage(decrypted_buffer->data(), &decrypted_buffer_len, deserialized_msg); } From 9768c96b7043192f69915274f123ad8bdc720792 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Fri, 5 Jul 2024 01:24:58 +0300 Subject: [PATCH 022/122] GH-43150: [Docs] Correct documentation of pyarrow.compute.microsecond (#43151) ### What changes are included in this PR? Corrected a small mistake in the docstring * GitHub Issue: #43150 Authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Signed-off-by: Rok Mihevc --- cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index f49e201492c9b..35b1deb3cda58 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -1741,7 +1741,7 @@ const FunctionDoc millisecond_doc{ const FunctionDoc microsecond_doc{ "Extract microsecond values", - ("Millisecond returns number of microseconds since the last full millisecond.\n" + ("Microsecond returns number of microseconds since the last full millisecond.\n" "Null values emit null.\n" "An error is returned if the values have a defined timezone but it\n" "cannot be found in the timezone database."), From 3bb988e14fc4635391229696c87aae8f7ddcfa16 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Fri, 5 Jul 2024 09:17:34 +0900 Subject: [PATCH 023/122] GH-41623: [Docs] Remove the warning for `arrow::dataset` (#43148) ### Rationale for this change According to the discussion in [41623](https://github.com/apache/arrow/issues/41623), the warning message indicating that the `arrow::dataset` is experimental is no longer necessary. ### What changes are included in this PR? Remove warning message ### Are these changes tested? Yes. I have verified the changes by building the documentation. ### Are there any user-facing changes? Yes. the updated documentation will be visible to users. * GitHub Issue: #41623 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- docs/source/cpp/dataset.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/source/cpp/dataset.rst b/docs/source/cpp/dataset.rst index a64b73b61c05d..79a731cd358cd 100644 --- a/docs/source/cpp/dataset.rst +++ b/docs/source/cpp/dataset.rst @@ -27,11 +27,6 @@ Tabular Datasets .. seealso:: :doc:`Dataset API reference ` -.. warning:: - - The ``arrow::dataset`` namespace is experimental, and a stable API - is not yet guaranteed. - The Arrow Datasets library provides functionality to efficiently work with tabular, potentially larger than memory, and multi-file datasets. This includes: From 5a28e180f0a078d0b1a970310d5c03ea6a351ffe Mon Sep 17 00:00:00 2001 From: Amir Gonnen Date: Fri, 5 Jul 2024 07:57:26 +0300 Subject: [PATCH 024/122] GH-43130: [C++][ArrowFlight] Crash due to UCS thread mode (#43120) When mode is `UCS_THREAD_MODE_SERIALIZED`, UCX crash due to mpool corruption. This happens when buffer is deallocated on a different thread. In such case two threads access UCX memory pool simultaneously. See discussion on UCX forum: https://github.com/openucx/ucx/discussions/9987 * GitHub Issue: #43130 Authored-by: Amir Gonnen Signed-off-by: David Li --- cpp/src/arrow/flight/transport/ucx/ucx_client.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc index 32c2fd776f32b..946ac2d176203 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc @@ -97,7 +97,7 @@ class ClientConnection { ucp_worker_params_t worker_params; std::memset(&worker_params, 0, sizeof(worker_params)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; - worker_params.thread_mode = UCS_THREAD_MODE_SERIALIZED; + worker_params.thread_mode = UCS_THREAD_MODE_MULTI; ucp_worker_h ucp_worker; status = ucp_worker_create(ucp_context->get(), &worker_params, &ucp_worker); From 73f2ad9a23265a6ffe9e60e4fd235b51d37c173d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 5 Jul 2024 14:30:42 +0900 Subject: [PATCH 025/122] GH-43134: [C++] Upgrade bundled google-cloud-cpp to 2.22.0 (#43136) ### Rationale for this change `google_cloud_cpp_mocks` depends on `GTest::gmock_main` but it's built without `BUILD_TESTING`. google-cloud-cpp finds GoogleTest only with `BUILD_TESTING`. ### What changes are included in this PR? The recent google-cloud-cpp doesn't build `google_cloud_cpp_mocks` without `BUILD_TESTING`. Note that we can't use 2.23.0 or later because they can't be built with MinGW-w64. See also: * https://github.com/mingw-w64/mingw-w64/issues/49 * https://github.com/googleapis/google-cloud-cpp/issues/14436 ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #43134 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ci/scripts/PKGBUILD | 6 ++++++ cpp/cmake_modules/ThirdpartyToolchain.cmake | 7 ++++++- cpp/thirdparty/versions.txt | 4 ++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index f6bbc78be710e..e12099f2b405d 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -69,6 +69,12 @@ build() { mkdir -p ${cpp_build_dir} pushd ${cpp_build_dir} + # We use static cURL in google-cloud-cpp. If we can use cURL's CMake + # package, we don't need to specify CURL_STATICLIB explicitly. But + # we don't have cURL's CMake package. We need to use CXXFLAGS + # instead of ARROW_CXXFLAGS because ARROW_CXXFLAGS aren't passed to + # ExternProjects. + export CXXFLAGS="${CXXFLAGS} -DCURL_STATICLIB" # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't # set the appropriate compiler definition. export CPPFLAGS="-DUTF8PROC_STATIC" diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index fe859a0121ca6..171c85baa86c3 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4225,7 +4225,8 @@ macro(build_nlohmann_json) set(NLOHMANN_JSON_INCLUDE_DIR "${NLOHMANN_JSON_PREFIX}/include") set(NLOHMANN_JSON_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=" - -DJSON_BuildTests=OFF) + # google-cloud-cpp requires JSON_MultipleHeaders=ON + -DJSON_BuildTests=OFF -DJSON_MultipleHeaders=ON) set(NLOHMANN_JSON_BUILD_BYPRODUCTS ${NLOHMANN_JSON_PREFIX}/include/nlohmann/json.hpp) @@ -4294,6 +4295,7 @@ macro(build_google_cloud_cpp_storage) # We need this to build with OpenSSL 3.0. # See also: https://github.com/googleapis/google-cloud-cpp/issues/8544 -DGOOGLE_CLOUD_CPP_ENABLE_WERROR=OFF + -DGOOGLE_CLOUD_CPP_WITH_MOCKS=OFF -DOPENSSL_CRYPTO_LIBRARY=${OPENSSL_CRYPTO_LIBRARY} -DOPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR} -DOPENSSL_SSL_LIBRARY=${OPENSSL_SSL_LIBRARY}) @@ -4380,6 +4382,9 @@ macro(build_google_cloud_cpp_storage) nlohmann_json::nlohmann_json OpenSSL::SSL OpenSSL::Crypto) + if(WIN32) + target_link_libraries(google-cloud-cpp::rest-internal INTERFACE ws2_32) + endif() add_library(google-cloud-cpp::storage STATIC IMPORTED) set_target_properties(google-cloud-cpp::storage diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index f080cf58bca85..986ac056b61a6 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -72,8 +72,8 @@ ARROW_GFLAGS_BUILD_VERSION=v2.2.2 ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf ARROW_GLOG_BUILD_VERSION=v0.5.0 ARROW_GLOG_BUILD_SHA256_CHECKSUM=eede71f28371bf39aa69b45de23b329d37214016e2055269b3b5e7cfd40b59f5 -ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.12.0 -ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=8cda870803925c62de8716a765e03eb9d34249977e5cdb7d0d20367e997a55e2 +ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.22.0 +ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a042c1aae0c2feee905acaa2a2dc9bf ARROW_GRPC_BUILD_VERSION=v1.46.3 ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 ARROW_GTEST_BUILD_VERSION=1.11.0 From 5b5c164a6a467af2803e927b2de1b9b6ee5de895 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Fri, 5 Jul 2024 16:59:58 +0800 Subject: [PATCH 026/122] GH-43116: [C++][Compute] Mark KeyCompare.CompareColumnsToRowsLarge as large memory test (#43128) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change This test consumes more than 4GB memory and causes oom-kill when running with TSAN as reported in #43116 . ### What changes are included in this PR? Limit its running by marking it as large memory test. ### Are these changes tested? Change is test. ### Are there any user-facing changes? None. * GitHub Issue: #43116 Authored-by: Ruoxi Sun Signed-off-by: Raúl Cumplido --- cpp/src/arrow/compute/row/compare_test.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc index 662862075c245..22af7e067d855 100644 --- a/cpp/src/arrow/compute/row/compare_test.cc +++ b/cpp/src/arrow/compute/row/compare_test.cc @@ -166,10 +166,9 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } -#ifndef ARROW_VALGRIND // Compare columns to rows at offsets over 2GB within a row table. // Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813. -TEST(KeyCompare, CompareColumnsToRowsLarge) { +TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsLarge)) { if constexpr (sizeof(void*) == 4) { GTEST_SKIP() << "Test only works on 64-bit platforms"; } @@ -300,7 +299,6 @@ TEST(KeyCompare, CompareColumnsToRowsLarge) { num_rows); } } -#endif // ARROW_VALGRIND } // namespace compute } // namespace arrow From 2de8008748e2dde383047e47580c69b96fccb35c Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 5 Jul 2024 22:17:25 +0100 Subject: [PATCH 027/122] GH-41910: [Python] Add support for Pyodide (#37822) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyarrow knows about ARROW_ENABLE_THREADING and doesn't use threads if they are not enabled in libarrow. Split from #37696 * GitHub Issue: #41910 Lead-authored-by: Joe Marshall Co-authored-by: Joris Van den Bossche Co-authored-by: Raúl Cumplido Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .pre-commit-config.yaml | 1 + ci/docker/conda-python-emscripten.dockerfile | 64 ++++ ci/scripts/cpp_build.sh | 6 +- ci/scripts/install_chromedriver.sh | 44 +++ ci/scripts/install_emscripten.sh | 36 ++ ci/scripts/python_build_emscripten.sh | 40 +++ ci/scripts/python_test_emscripten.sh | 38 ++ cpp/CMakePresets.json | 2 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 + dev/tasks/tasks.yml | 9 + docker-compose.yml | 33 ++ python/CMakeLists.txt | 106 ++++-- python/pyarrow/_dataset_parquet.pyx | 6 +- python/pyarrow/_parquet.pyx | 10 +- python/pyarrow/conftest.py | 40 ++- python/pyarrow/error.pxi | 5 +- python/pyarrow/includes/libarrow_python.pxd | 3 + python/pyarrow/io.pxi | 81 +++++ python/pyarrow/lib.pyx | 11 + python/pyarrow/pandas_compat.py | 5 +- python/pyarrow/src/arrow/python/helpers.cc | 9 + python/pyarrow/src/arrow/python/helpers.h | 3 + python/pyarrow/tests/test_array.py | 1 + python/pyarrow/tests/test_cffi.py | 1 + python/pyarrow/tests/test_compute.py | 24 +- python/pyarrow/tests/test_convert_builtin.py | 6 +- python/pyarrow/tests/test_csv.py | 3 + python/pyarrow/tests/test_dataset.py | 17 +- python/pyarrow/tests/test_flight.py | 3 + python/pyarrow/tests/test_fs.py | 2 + python/pyarrow/tests/test_io.py | 7 +- python/pyarrow/tests/test_ipc.py | 3 + python/pyarrow/tests/test_jvm.py | 2 + python/pyarrow/tests/test_memory.py | 1 + python/pyarrow/tests/test_misc.py | 2 + python/pyarrow/tests/test_pandas.py | 4 + python/pyarrow/tests/test_scalars.py | 8 +- python/pyarrow/tests/test_schema.py | 1 + python/pyarrow/tests/test_tensor.py | 4 + python/pyarrow/tests/test_types.py | 2 + python/scripts/run_emscripten_tests.py | 343 +++++++++++++++++++ python/setup.py | 18 +- 42 files changed, 940 insertions(+), 65 deletions(-) create mode 100644 ci/docker/conda-python-emscripten.dockerfile create mode 100755 ci/scripts/install_chromedriver.sh create mode 100755 ci/scripts/install_emscripten.sh create mode 100755 ci/scripts/python_build_emscripten.sh create mode 100755 ci/scripts/python_test_emscripten.sh create mode 100644 python/scripts/run_emscripten_tests.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9bdd4f487bdec..bf0bcde14622a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,6 +38,7 @@ repos: # files: (/Dockerfile|\.dockerfile)$ files: >- ( + ?^ci/docker/conda-python-emscripten\.dockerfile$| ?^ci/docker/python-wheel-windows-test-vs2019\.dockerfile$| ) types: [] diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile new file mode 100644 index 0000000000000..8ad705c920ba8 --- /dev/null +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch +ARG python="3.12" +FROM ${repo}:${arch}-conda-python-${python} + +ARG selenium_version="4.15.2" +ARG pyodide_version="0.26.0" +ARG chrome_version="latest" +ARG required_python_min="(3,12)" +# fail if python version < 3.12 +RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" + +# install selenium and pyodide-build and recent python + +# needs to be a login shell so ~/.profile is read +SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"] + +RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \ + python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version} + +# install pyodide dist directory to /pyodide +RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \ + wget -q "${pyodide_dist_url}" -O- | tar -xj -C / + +# install correct version of emscripten for this pyodide +COPY ci/scripts/install_emscripten.sh /arrow/ci/scripts/ +RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide + +# make sure zlib is cached in the EMSDK folder +RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib + +# install node 20 (needed for async call support) +# and pthread-stubs for build, and unzip needed for chrome build to work +RUN conda install nodejs=20 unzip pthread-stubs make -c conda-forge + +# install chrome for testing browser based runner +COPY ci/scripts/install_chromedriver.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_chromedriver.sh "${chrome_version}" + +# make the version of make that is installed by conda be available everywhere +# or else pyodide's isolated build fails to find it +RUN ln -s "$(type -P make)" /bin/make + +ENV ARROW_BUILD_TESTS="OFF" \ + ARROW_BUILD_TYPE="release" \ + ARROW_DEPENDENCY_SOURCE="BUNDLED" \ + ARROW_EMSCRIPTEN="ON" diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 3ee7fbd9d19cd..bc2bba915f73a 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -30,7 +30,7 @@ if [ -x "$(command -v git)" ]; then fi # TODO(kszucs): consider to move these to CMake -if [ ! -z "${CONDA_PREFIX}" ]; then +if [ ! -z "${CONDA_PREFIX}" ] && [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ]; then echo -e "===\n=== Conda environment for build\n===" conda list @@ -99,6 +99,10 @@ if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then fi n_jobs=2 # Emscripten build fails on docker unless this is set really low source ~/emsdk/emsdk_env.sh + export CMAKE_INSTALL_PREFIX=$(em-config CACHE)/sysroot + # conda sets LDFLAGS / CFLAGS etc. which break + # emcmake so we unset them + unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS emcmake cmake \ --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ diff --git a/ci/scripts/install_chromedriver.sh b/ci/scripts/install_chromedriver.sh new file mode 100755 index 0000000000000..9097a20bfc5c9 --- /dev/null +++ b/ci/scripts/install_chromedriver.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Install Chrome and Chromedriver for Selenium + +set -e + +chrome_version=$1 + +if [ $chrome_version = "latest" ]; then + latest_release_path=LATEST_RELEASE_STABLE +else + latest_release_path=LATEST_RELEASE_${chrome_version} +fi +CHROME_VERSION_FULL=$(wget -q --no-verbose -O - "https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}") +CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb" +CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip" +wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}" +apt-get update +apt install -qqy /tmp/google-chrome.deb +rm -f /tmp/google-chrome.deb +rm -rf /var/lib/apt/lists/* +wget --no-verbose -O /tmp/chromedriver-linux64.zip "${CHROMEDRIVER_DOWNLOAD_URL}" +unzip /tmp/chromedriver-linux64.zip -d /opt/ +rm /tmp/chromedriver-linux64.zip +ln -fs /opt/chromedriver-linux64/chromedriver /usr/local/bin/chromedriver +echo "Using Chrome version: $(google-chrome --version)" +echo "Using Chrome Driver version: $(chromedriver --version)" diff --git a/ci/scripts/install_emscripten.sh b/ci/scripts/install_emscripten.sh new file mode 100755 index 0000000000000..4bad7238a6cdd --- /dev/null +++ b/ci/scripts/install_emscripten.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# install emscripten sdk version to match pyodide in $2 to directory $1/emsdk + +set -e + +target_path=$1 +pyodide_path=$2 + +emscripten_version=$(${pyodide_path}/python -c "import sys;print(*sys._emscripten_info.emscripten_version,sep='.')") + +cd ${target_path} +if [ ! -d emsdk ]; then + git clone https://github.com/emscripten-core/emsdk.git +fi +cd emsdk +./emsdk install ${emscripten_version} +./emsdk activate ${emscripten_version} +echo "Installed emsdk to: ${target_path}" \ No newline at end of file diff --git a/ci/scripts/python_build_emscripten.sh b/ci/scripts/python_build_emscripten.sh new file mode 100755 index 0000000000000..14e9626202079 --- /dev/null +++ b/ci/scripts/python_build_emscripten.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} + + +source ~/emsdk/emsdk_env.sh + +source_dir=${arrow_dir}/python +python_build_dir=${build_dir}/python + +rm -rf ${python_build_dir} +cp -aL ${source_dir} ${python_build_dir} + +# conda sets LDFLAGS / CFLAGS etc. which break +# emcmake so we unset them +unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS + +pushd ${python_build_dir} +pyodide build +popd diff --git a/ci/scripts/python_test_emscripten.sh b/ci/scripts/python_test_emscripten.sh new file mode 100755 index 0000000000000..4029722568b9b --- /dev/null +++ b/ci/scripts/python_test_emscripten.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# run tests against Chrome and node.js as representative +# WebAssembly platforms (i.e. one browser, one non-browser). + +set -ex + +build_dir=${1}/python +pyodide_dist_dir=${2} + +cd ${build_dir} + +# note: this uses the newest wheel in dist +pyodide_wheel=$(ls -t dist/pyarrow*.whl | head -1) + +echo "-------------- Running emscripten tests in Node ----------------------" +python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=node + +echo "-------------- Running emscripten tests in Chrome --------------------" +python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=chrome + diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index cb4cdfc03ac82..8886db0e11017 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -53,6 +53,7 @@ "ARROW_ACERO": "ON", "ARROW_BUILD_SHARED": "OFF", "ARROW_BUILD_STATIC": "ON", + "ARROW_CSV": "ON", "ARROW_CUDA": "OFF", "ARROW_DEPENDENCY_SOURCE": "BUNDLED", "ARROW_DEPENDENCY_USE_SHARED": "OFF", @@ -60,6 +61,7 @@ "ARROW_FLIGHT": "OFF", "ARROW_IPC": "ON", "ARROW_JEMALLOC": "OFF", + "ARROW_JSON": "ON", "ARROW_MIMALLOC": "OFF", "ARROW_ORC": "ON", "ARROW_RUNTIME_SIMD_LEVEL": "NONE", diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 171c85baa86c3..7dab0a362ff24 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4551,6 +4551,7 @@ macro(build_orc) BUILD_BYPRODUCTS ${ORC_STATIC_LIB} CMAKE_ARGS ${ORC_CMAKE_ARGS} DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_PROTOBUF_PROTOC} ${ARROW_ZSTD_LIBZSTD} ${Snappy_TARGET} LZ4::lz4 diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 61df283960ccf..7a86fd3e3e75f 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1194,6 +1194,15 @@ tasks: image: conda-python {% endfor %} + test-conda-python-emscripten: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 22.04 + PYTHON: 3.12 + image: conda-python-emscripten + test-conda-python-3.11-hypothesis: ci: github template: docker-tests/github.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index a2a2b41c8747f..fa248d59037d3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -121,6 +121,7 @@ x-hierarchy: - conda-python-docs - conda-python-cython2 - conda-python-dask + - conda-python-emscripten - conda-python-hdfs - conda-python-java-integration - conda-python-jpype @@ -875,6 +876,38 @@ services: /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/python_test.sh /arrow"] + conda-python-emscripten: + # Usage: + # docker-compose build conda-python-emscripten + # docker-compose run --rm conda-python-emscripten + # Parameters: + # ARCH: amd64, arm64v8, ... + # UBUNTU: 22.04 + image: ${REPO}:${ARCH}-conda-python-emscripten + build: + context: . + dockerfile: ci/docker/conda-python-emscripten.dockerfile + cache_from: + - ${REPO}:${ARCH}-conda-python-${PYTHON} + args: + repo: ${REPO} + arch: ${ARCH} + clang_tools: ${CLANG_TOOLS} + llvm: ${LLVM} + pyodide_version: "0.26.0" + chrome_version: "122" + selenium_version: "4.15.2" + required_python_min: "(3,12)" + python: ${PYTHON} + shm_size: *shm-size + volumes: *ubuntu-volumes + environment: + <<: [*common, *ccache, *sccache, *cpp] + command: [" + /arrow/ci/scripts/cpp_build.sh /arrow /build && + /arrow/ci/scripts/python_build_emscripten.sh /arrow /build && + /arrow/ci/scripts/python_test_emscripten.sh /build /pyodide"] + ubuntu-cuda-python: # Usage: # docker-compose build cuda-cpp diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a8bbed117163d..980a63133c83c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -143,6 +143,21 @@ if(NOT DEFINED ARROW_RUNTIME_SIMD_LEVEL) endif() include(SetupCxxFlags) +if($ENV{PYODIDE}) + # These variables are needed for building PyArrow on Emscripten. + # If they aren't set, CMake cross compiling fails for Python + # modules (at least under Pyodide it does). + set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) + set(Python3_LIBRARY $ENV{CPYTHONLIB}) + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) + set(Python3_EXECUTABLE) + set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) + # we set the c and cxx compiler manually to bypass pywasmcross + # which is pyodide's way of messing with C++ build parameters. + set(CMAKE_C_COMPILER emcc) + set(CMAKE_CXX_COMPILER em++) +endif() + # Add common flags set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") @@ -344,6 +359,27 @@ set(PYARROW_CPP_LINK_LIBS "") # # Check all the options from Arrow and PyArrow C++ to be in line +# +# Order is important for "NOT ARROW_BUILD_SHARED". We must use +# depending modules -> depended modules order. For example, +# ArrowSubstrait depends on ArrowDataset. So PYARROW_CPP_LINK_LIBS +# must use +# "ArrowSubstrait::arrow_substrait_static;ArrowDataset::arrow_dataset_static" +# order. + +if(PYARROW_BUILD_SUBSTRAIT) + message(STATUS "Building PyArrow with Substrait") + if(NOT ARROW_SUBSTRAIT) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON") + endif() + find_package(ArrowSubstrait REQUIRED) + if(ARROW_BUILD_SHARED) + list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_shared) + else() + list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_static) + endif() +endif() + if(PYARROW_BUILD_DATASET) message(STATUS "Building PyArrow with Dataset") if(NOT ARROW_DATASET) @@ -414,7 +450,17 @@ if(NOT CMAKE_VERSION VERSION_LESS 3.16) target_precompile_headers(arrow_python PUBLIC "$<$:arrow/python/pch.h>") endif() -target_link_libraries(arrow_python PUBLIC ${PYARROW_CPP_LINK_LIBS} Python3::NumPy) + +# on static builds we need to be careful not to link PYARROW_CPP_LINK_LIBS +# into everything depending on arrow_python, or else we get duplicate +# libraries. Whereas conversely on shared builds, we need everything +# to depend on everything, as python loads modules separately +if(ARROW_BUILD_SHARED) + target_link_libraries(arrow_python PUBLIC ${PYARROW_CPP_LINK_LIBS}) +else() + target_link_libraries(arrow_python PRIVATE ${PYARROW_CPP_LINK_LIBS}) +endif() +target_link_libraries(arrow_python PUBLIC Python3::NumPy) target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING) install(TARGETS arrow_python ARCHIVE DESTINATION . @@ -650,27 +696,37 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) + if(ARROW_BUILD_SHARED) + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) + endif() endif() - endif() - set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) + set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) + else() + # Acero is statically linked into libarrow_python already + set(ACERO_LINK_LIBS) + endif() list(APPEND CYTHON_EXTENSIONS _acero) endif() # Dataset if(PYARROW_BUILD_DATASET) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) + if(ARROW_BUILD_SHARED) + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) + endif() endif() - endif() - set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + else() + # dataset is statically linked into libarrow_python already + set(DATASET_LINK_LIBS) + endif() list(APPEND CYTHON_EXTENSIONS _dataset) endif() @@ -692,7 +748,9 @@ if(PYARROW_BUILD_PARQUET) endif() set(PARQUET_LINK_LIBS Parquet::parquet_shared) else() - set(PARQUET_LINK_LIBS Parquet::parquet_static) + # don't link the static lib as it is + # already in arrow_python + set(PARQUET_LINK_LIBS) endif() list(APPEND CYTHON_EXTENSIONS _parquet) if(PYARROW_BUILD_PARQUET_ENCRYPTION) @@ -741,18 +799,20 @@ endif() # Substrait if(PYARROW_BUILD_SUBSTRAIT) message(STATUS "Building PyArrow with Substrait") - if(NOT ARROW_SUBSTRAIT) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON") - endif() - find_package(ArrowSubstrait REQUIRED) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_SUBSTRAIT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_SUBSTRAIT_IMPORT_LIB}) + + if(ARROW_BUILD_SHARED) + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_SUBSTRAIT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_SUBSTRAIT_IMPORT_LIB}) + endif() endif() + set(SUBSTRAIT_LINK_LIBS ArrowSubstrait::arrow_substrait_shared) + else() + # Arrow Substrait is statically linked into libarrow_python already + set(SUBSTRAIT_LINK_LIBS) endif() - set(SUBSTRAIT_LINK_LIBS ArrowSubstrait::arrow_substrait_shared) list(APPEND CYTHON_EXTENSIONS _substrait) endif() diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index a7afd065b592e..8fe9f30d33af9 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -27,7 +27,7 @@ import warnings import pyarrow as pa from pyarrow.lib cimport * -from pyarrow.lib import frombytes, tobytes +from pyarrow.lib import frombytes, tobytes, is_threading_enabled from pyarrow.includes.libarrow cimport * from pyarrow.includes.libarrow_dataset cimport * from pyarrow.includes.libarrow_dataset_parquet cimport * @@ -739,6 +739,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): new CParquetFragmentScanOptions())) self.use_buffered_stream = use_buffered_stream self.buffer_size = buffer_size + if pre_buffer and not is_threading_enabled(): + pre_buffer = False self.pre_buffer = pre_buffer if cache_options is not None: self.cache_options = cache_options @@ -789,6 +791,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): @pre_buffer.setter def pre_buffer(self, bint pre_buffer): + if pre_buffer and not is_threading_enabled(): + return self.arrow_reader_properties().set_pre_buffer(pre_buffer) @property diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 6c5b0af826b4e..41b15b633d3d2 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -42,7 +42,7 @@ from pyarrow.lib cimport (_Weakrefable, Buffer, Schema, from pyarrow.lib import (ArrowException, NativeFile, BufferOutputStream, _stringify_path, - tobytes, frombytes) + tobytes, frombytes, is_threading_enabled) cimport cpython as cp @@ -1453,6 +1453,9 @@ cdef class ParquetReader(_Weakrefable): default_arrow_reader_properties()) FileReaderBuilder builder + if pre_buffer and not is_threading_enabled(): + pre_buffer = False + if metadata is not None: c_metadata = metadata.sp_metadata @@ -1555,7 +1558,10 @@ cdef class ParquetReader(_Weakrefable): ---------- use_threads : bool """ - self.reader.get().set_use_threads(use_threads) + if is_threading_enabled(): + self.reader.get().set_use_threads(use_threads) + else: + self.reader.get().set_use_threads(False) def set_batch_size(self, int64_t batch_size): """ diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py index 2ac8427de17e7..29c850c142da1 100644 --- a/python/pyarrow/conftest.py +++ b/python/pyarrow/conftest.py @@ -16,9 +16,14 @@ # under the License. import pytest + +import os import pyarrow as pa from pyarrow import Codec from pyarrow import fs +from pyarrow.lib import is_threading_enabled +from pyarrow.tests.util import windows_has_tzdata +import sys import numpy as np @@ -31,6 +36,7 @@ 'dataset', 'hypothesis', 'fastparquet', + 'flight', 'gandiva', 'gcs', 'gdb', @@ -44,12 +50,15 @@ 'pandas', 'parquet', 'parquet_encryption', + 'processes', + 'requires_testing_data', 's3', + 'slow', 'snappy', + 'sockets', 'substrait', - 'flight', - 'slow', - 'requires_testing_data', + 'threading', + 'timezone_data', 'zstd', ] @@ -76,14 +85,31 @@ 'pandas': False, 'parquet': False, 'parquet_encryption': False, + 'processes': True, 'requires_testing_data': True, 's3': False, 'slow': False, 'snappy': Codec.is_available('snappy'), + 'sockets': True, 'substrait': False, + 'threading': is_threading_enabled(), + 'timezone_data': True, 'zstd': Codec.is_available('zstd'), } +if sys.platform == "emscripten": + # Emscripten doesn't support subprocess, + # multiprocessing, gdb or socket based + # networking + defaults['gdb'] = False + defaults['processes'] = False + defaults['sockets'] = False + +if sys.platform == "win32": + defaults['timezone_data'] = windows_has_tzdata() +elif sys.platform == "emscripten": + defaults['timezone_data'] = os.path.exists("/usr/share/zoneinfo") + try: import cython # noqa defaults['cython'] = True @@ -116,7 +142,13 @@ try: import pyarrow.orc # noqa - defaults['orc'] = True + if sys.platform == "win32": + defaults['orc'] = True + else: + # orc tests on non-Windows platforms only work + # if timezone data exists, so skip them if + # not. + defaults['orc'] = defaults['timezone_data'] except ImportError: pass diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi index 4357cde32c31d..cbe25522e8d7e 100644 --- a/python/pyarrow/error.pxi +++ b/python/pyarrow/error.pxi @@ -26,6 +26,7 @@ import os import signal import threading +from pyarrow.lib import is_threading_enabled from pyarrow.util import _break_traceback_cycle_from_frame @@ -217,7 +218,9 @@ cdef class SignalStopHandler: maybe_source.status().Warn() else: self._stop_token.init(deref(maybe_source).token()) - self._enabled = True + # signals don't work on Emscripten without threads. + # and possibly other single-thread environments. + self._enabled = is_threading_enabled() def _init_signals(self): if (signal_handlers_enabled and diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index 136d6bc8b14cd..9fcc97aaf0a9c 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -317,3 +317,6 @@ cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark": cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil: void GdbTestSession "arrow::gdb::TestSession"() + +cdef extern from "arrow/python/helpers.h" namespace "arrow::py::internal": + c_bool IsThreadingEnabled() diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 3a0125e957244..1d942e8ccabc6 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -659,6 +659,9 @@ cdef class NativeFile(_Weakrefable): int64_t bytes_read = 0 uint8_t* buf + if not is_threading_enabled(): + return self._download_nothreads(stream_or_path, buffer_size) + handle = self.get_input_stream() buffer_size = buffer_size or DEFAULT_BUFFER_SIZE @@ -738,6 +741,63 @@ cdef class NativeFile(_Weakrefable): if exc_info is not None: raise exc_info[0], exc_info[1], exc_info[2] + def _download_nothreads(self, stream_or_path, buffer_size=None): + """ + Internal method to do a download without separate threads, queues etc. + Called by download above if is_threading_enabled() == False + """ + cdef: + int64_t bytes_read = 0 + uint8_t* buf + + handle = self.get_input_stream() + + buffer_size = buffer_size or DEFAULT_BUFFER_SIZE + + if not hasattr(stream_or_path, 'read'): + stream = open(stream_or_path, 'wb') + + def cleanup(): + stream.close() + else: + stream = stream_or_path + + def cleanup(): + pass + + self.seek(0) + + # This isn't ideal -- PyBytes_FromStringAndSize copies the data from + # the passed buffer, so it's hard for us to avoid doubling the memory + buf = malloc(buffer_size) + if buf == NULL: + raise MemoryError("Failed to allocate {0} bytes" + .format(buffer_size)) + + cdef int64_t total_bytes = 0 + cdef int32_t c_buffer_size = buffer_size + + try: + while True: + with nogil: + bytes_read = GetResultValue( + handle.get().Read(c_buffer_size, buf)) + + total_bytes += bytes_read + + # EOF + if bytes_read == 0: + break + + pybuf = cp.PyBytes_FromStringAndSize(buf, + bytes_read) + + # no background thread - write on main thread + stream.write(pybuf) + finally: + free(buf) + cleanup() + def upload(self, stream, buffer_size=None): """ Write from a source stream to this file. @@ -749,6 +809,9 @@ cdef class NativeFile(_Weakrefable): buffer_size : int, optional The buffer size to use for data transfers. """ + if not is_threading_enabled(): + return self._upload_nothreads(stream, buffer_size) + write_queue = Queue(50) self._assert_writable() @@ -793,6 +856,24 @@ cdef class NativeFile(_Weakrefable): if exc_info is not None: raise exc_info[0], exc_info[1], exc_info[2] + def _upload_nothreads(self, stream, buffer_size=None): + """ + Internal method to do an upload without separate threads, queues etc. + Called by upload above if is_threading_enabled() == False + """ + self._assert_writable() + + buffer_size = buffer_size or DEFAULT_BUFFER_SIZE + + while True: + buf = stream.read(buffer_size) + if not buf: + break + + # no threading - just write + self.write(buf) + + BufferedIOBase.register(NativeFile) # ---------------------------------------------------------------------- diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index e08021c62b5ae..c72841c299566 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -79,6 +79,17 @@ def set_cpu_count(int count): check_status(SetCpuThreadPoolCapacity(count)) +def is_threading_enabled() -> bool: + """ + Returns True if threading is enabled in libarrow. + + If it isn't enabled, then python shouldn't create any + threads either, because we're probably on a system where + threading doesn't work (e.g. Emscripten). + """ + return libarrow_python.IsThreadingEnabled() + + Type_NA = _Type_NA Type_BOOL = _Type_BOOL Type_UINT8 = _Type_UINT8 diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index c23c64d532b66..fcccf564fc619 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -33,7 +33,7 @@ import numpy as np import pyarrow as pa -from pyarrow.lib import _pandas_api, frombytes # noqa +from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled # noqa _logical_type_map = {} @@ -581,6 +581,9 @@ def dataframe_to_arrays(df, schema, preserve_index, nthreads=1, columns=None, nthreads = pa.cpu_count() else: nthreads = 1 + # if we don't have threading in libarrow, don't use threading here either + if not is_threading_enabled(): + nthreads = 1 def convert_column(col, field): if field is None: diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc index 2c86c86a919be..18302e6fe0401 100644 --- a/python/pyarrow/src/arrow/python/helpers.cc +++ b/python/pyarrow/src/arrow/python/helpers.cc @@ -29,6 +29,7 @@ #include "arrow/python/decimal.h" #include "arrow/type_fwd.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/config.h" #include "arrow/util/logging.h" namespace arrow { @@ -467,6 +468,14 @@ void DebugPrint(PyObject* obj) { PySys_WriteStderr("%s\n", repr.c_str()); } +bool IsThreadingEnabled() { +#ifdef ARROW_ENABLE_THREADING + return true; +#else + return false; +#endif +} + } // namespace internal } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/arrow/python/helpers.h b/python/pyarrow/src/arrow/python/helpers.h index a8e5f80b60678..e2fd8212ae68d 100644 --- a/python/pyarrow/src/arrow/python/helpers.h +++ b/python/pyarrow/src/arrow/python/helpers.h @@ -154,6 +154,9 @@ Status IntegerScalarToFloat32Safe(PyObject* obj, float* result); // \brief Print Python object __repr__ void DebugPrint(PyObject* obj); +ARROW_PYTHON_EXPORT +bool IsThreadingEnabled(); + } // namespace internal } // namespace py } // namespace arrow diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 78d06b26e3622..30d258b9aabd8 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -34,6 +34,7 @@ from pyarrow.vendored.version import Version +@pytest.mark.processes def test_total_bytes_allocated(): code = """if 1: import pyarrow as pa diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index 70841eeb0619a..e994a09f92ed2 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -749,6 +749,7 @@ def test_roundtrip_chunked_array_capsule_requested_schema(): chunked.__arrow_c_stream__(requested_capsule) +@needs_cffi def test_import_device_no_cuda(): try: import pyarrow.cuda # noqa diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index e7d7b9769740f..13e30ed1da493 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -38,7 +38,6 @@ import pyarrow as pa import pyarrow.compute as pc from pyarrow.lib import ArrowNotImplementedError -from pyarrow.tests import util try: import pyarrow.substrait as pas @@ -137,7 +136,7 @@ def test_exported_option_classes(): @pytest.mark.filterwarnings( "ignore:pyarrow.CumulativeSumOptions is deprecated as of 14.0" ) -def test_option_class_equality(): +def test_option_class_equality(request): options = [ pc.ArraySortOptions(), pc.AssumeTimezoneOptions("UTC"), @@ -193,17 +192,17 @@ def test_option_class_equality(): pc.WeekOptions(week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False), ] - # Timezone database might not be installed on Windows - if sys.platform != "win32" or util.windows_has_tzdata(): + # Timezone database might not be installed on Windows or Emscripten + if request.config.pyarrow.is_enabled["timezone_data"]: options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana")) classes = {type(option) for option in options} for cls in exported_option_classes: - # Timezone database might not be installed on Windows + # Timezone database might not be installed on Windows or Emscripten if ( cls not in classes - and (sys.platform != "win32" or util.windows_has_tzdata()) + and (request.config.pyarrow.is_enabled["timezone_data"]) and cls != pc.AssumeTimezoneOptions ): try: @@ -2085,8 +2084,7 @@ def test_strptime(): @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), - reason="Timezone database is not installed on Windows") +@pytest.mark.timezone_data def test_strftime(): times = ["2018-03-10 09:00", "2038-01-31 12:23", None] timezones = ["CET", "UTC", "Europe/Ljubljana"] @@ -2245,7 +2243,7 @@ def _check_datetime_components(timestamps, timezone=None): @pytest.mark.pandas -def test_extract_datetime_components(): +def test_extract_datetime_components(request): timestamps = ["1970-01-01T00:00:59.123456789", "2000-02-29T23:23:23.999999999", "2033-05-18T03:33:20.000000000", @@ -2268,7 +2266,7 @@ def test_extract_datetime_components(): _check_datetime_components(timestamps) # Test timezone aware timestamp array - if sys.platform == "win32" and not util.windows_has_tzdata(): + if not request.config.pyarrow.is_enabled["timezone_data"]: pytest.skip('Timezone database is not installed on Windows') else: for timezone in timezones: @@ -2289,8 +2287,7 @@ def test_iso_calendar_longer_array(unit): @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), - reason="Timezone database is not installed on Windows") +@pytest.mark.timezone_data def test_assume_timezone(): ts_type = pa.timestamp("ns") timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789", @@ -2485,8 +2482,7 @@ def _check_temporal_rounding(ts, values, unit): np.testing.assert_array_equal(result, expected) -@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), - reason="Timezone database is not installed on Windows") +@pytest.mark.timezone_data @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond", "second", "minute", "hour", "day")) @pytest.mark.pandas diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index b824b89564374..6140163a8ee8c 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -21,7 +21,6 @@ import itertools import math import re -import sys import hypothesis as h import numpy as np @@ -29,7 +28,6 @@ from pyarrow.pandas_compat import _pandas_api # noqa import pyarrow as pa -from pyarrow.tests import util import pyarrow.tests.strategies as past @@ -1161,6 +1159,7 @@ def test_sequence_timestamp_with_timezone_inference(): assert arr.type == expected_type +@pytest.mark.timezone_data def test_sequence_timestamp_with_zoneinfo_timezone_inference(): pytest.importorskip("zoneinfo") import zoneinfo @@ -1354,8 +1353,7 @@ def test_sequence_timestamp_nanoseconds(): @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), - reason="Timezone database is not installed on Windows") +@pytest.mark.timezone_data def test_sequence_timestamp_from_int_with_unit(): # TODO(wesm): This test might be rewritten to assert the actual behavior # when pandas is not installed diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index 9ddb5197e9120..112129d9602ed 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1406,6 +1406,7 @@ def test_stress_convert_options_blowup(self): assert table.num_rows == 0 assert table.column_names == col_names + @pytest.mark.threading def test_cancellation(self): if (threading.current_thread().ident != threading.main_thread().ident): @@ -1475,6 +1476,7 @@ def signal_from_thread(): assert isinstance(e, pa.ArrowCancelled) assert e.signum == signum + @pytest.mark.threading def test_cancellation_disabled(self): # ARROW-12622: reader would segfault when the cancelling signal # handler was not enabled (e.g. if disabled, or if not on the @@ -1825,6 +1827,7 @@ def use_threads(self): return False +@pytest.mark.threading class TestThreadedStreamingCSVRead(BaseStreamingCSVRead): @property def use_threads(self): diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 0b79218fb0018..3b0284bcb74a6 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -37,6 +37,7 @@ import pyarrow.feather import pyarrow.fs as fs import pyarrow.json +from pyarrow.lib import is_threading_enabled from pyarrow.tests.util import (FSProtocolClass, ProxyHandler, _configure_s3_limited_user, _filesystem_uri, change_cwd) @@ -808,29 +809,34 @@ def test_parquet_scan_options(): assert opts1.use_buffered_stream is False assert opts1.buffer_size == 2**13 - assert opts1.pre_buffer is True + if is_threading_enabled(): # pre buffering requires threads + assert opts1.pre_buffer is True assert opts1.thrift_string_size_limit == 100_000_000 # default in C++ assert opts1.thrift_container_size_limit == 1_000_000 # default in C++ assert opts1.page_checksum_verification is False assert opts2.use_buffered_stream is False assert opts2.buffer_size == 2**12 - assert opts2.pre_buffer is True + if is_threading_enabled(): # pre buffering requires threads + assert opts2.pre_buffer is True assert opts3.use_buffered_stream is True assert opts3.buffer_size == 2**13 - assert opts3.pre_buffer is True + if is_threading_enabled(): # pre buffering requires threads + assert opts3.pre_buffer is True assert opts4.use_buffered_stream is False assert opts4.buffer_size == 2**13 - assert opts4.pre_buffer is False + if is_threading_enabled(): # pre buffering requires threads + assert opts4.pre_buffer is False assert opts5.thrift_string_size_limit == 123456 assert opts5.thrift_container_size_limit == 987654 assert opts6.page_checksum_verification is True - assert opts7.pre_buffer is True + if is_threading_enabled(): # pre buffering requires threads + assert opts7.pre_buffer is True assert opts7.cache_options == cache_opts assert opts7.cache_options != opts1.cache_options @@ -4106,6 +4112,7 @@ def test_write_dataset_with_scanner(tempdir): @pytest.mark.parquet +@pytest.mark.threading def test_write_dataset_with_backpressure(tempdir): consumer_gate = threading.Event() diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py index 9553dc2507225..4853dafc76c72 100644 --- a/python/pyarrow/tests/test_flight.py +++ b/python/pyarrow/tests/test_flight.py @@ -960,6 +960,7 @@ def test_server_exit_reraises_exception(): raise ValueError() +@pytest.mark.threading @pytest.mark.slow def test_client_wait_for_available(): location = ('localhost', find_free_port()) @@ -1603,6 +1604,7 @@ def test_cancel_do_get(): reader.read_chunk() +@pytest.mark.threading @pytest.mark.slow def test_cancel_do_get_threaded(): """Test canceling a DoGet operation from another thread.""" @@ -2067,6 +2069,7 @@ def do_exchange(self, context, descriptor, reader, writer): time.sleep(0.5) +@pytest.mark.threading def test_interrupt(): if threading.current_thread().ident != threading.main_thread().ident: pytest.skip("test only works from main Python thread") diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 412daa2bd9ea1..f8ce74700dea8 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -1987,6 +1987,8 @@ def test_s3_finalize_region_resolver(): subprocess.check_call([sys.executable, "-c", code]) +@pytest.mark.processes +@pytest.mark.threading @pytest.mark.s3 def test_concurrent_s3fs_init(): # GH-39897: lazy concurrent initialization of S3 subsystem should not crash diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 2306014c4194a..ef499a3a8d76c 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -37,7 +37,7 @@ def check_large_seeks(file_factory, expected_error=None): - if sys.platform in ('win32', 'darwin'): + if sys.platform in ('win32', 'darwin', 'emscripten'): pytest.skip("need sparse file support") try: filename = tempfile.mktemp(prefix='test_io') @@ -1143,6 +1143,8 @@ def _try_delete(path): def test_memory_map_writer(tmpdir): + if sys.platform == "emscripten": + pytest.xfail("Multiple memory maps to same file don't work on emscripten") SIZE = 4096 arr = np.random.randint(0, 256, size=SIZE).astype('u1') data = arr.tobytes()[:SIZE] @@ -1334,6 +1336,9 @@ def test_native_file_modes(tmpdir): assert f.seekable() +@pytest.mark.xfail( + sys.platform == "emscripten", reason="umask doesn't work on Emscripten" +) def test_native_file_permissions(tmpdir): # ARROW-10124: permissions of created files should follow umask cur_umask = os.umask(0o002) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index d8eb6e926e4c0..1e5242efe40f0 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -242,6 +242,7 @@ def test_empty_stream(): @pytest.mark.pandas +@pytest.mark.processes def test_read_year_month_nano_interval(tmpdir): """ARROW-15783: Verify to_pandas works for interval types. @@ -895,6 +896,7 @@ def socket_fixture(): return SocketStreamFixture() +@pytest.mark.sockets def test_socket_simple_roundtrip(socket_fixture): socket_fixture.start_server(do_read_all=False) writer_batches = socket_fixture.write_batches() @@ -906,6 +908,7 @@ def test_socket_simple_roundtrip(socket_fixture): assert reader_batches[i].equals(batch) +@pytest.mark.sockets def test_socket_read_all(socket_fixture): socket_fixture.start_server(do_read_all=True) writer_batches = socket_fixture.write_batches() diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py index c5996f9215343..e1bd0d82d9f16 100644 --- a/python/pyarrow/tests/test_jvm.py +++ b/python/pyarrow/tests/test_jvm.py @@ -26,6 +26,8 @@ jpype = pytest.importorskip("jpype") +pytestmark = pytest.mark.processes + @pytest.fixture(scope="session") def root_allocator(): diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py index 4f199952344f2..53c25f3b3ef20 100644 --- a/python/pyarrow/tests/test_memory.py +++ b/python/pyarrow/tests/test_memory.py @@ -26,6 +26,7 @@ import pytest +pytestmark = pytest.mark.processes possible_backends = ["system", "jemalloc", "mimalloc"] diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 3d8ab2999e603..c42e4fbdfc2e8 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -56,6 +56,7 @@ def test_io_thread_count(): pa.set_io_thread_count(n) +@pytest.mark.processes def test_env_var_io_thread_count(): # Test that the number of IO threads can be overridden with the # ARROW_IO_THREADS environment variable. @@ -117,6 +118,7 @@ def test_runtime_info(): subprocess.check_call([sys.executable, "-c", code], env=env) +@pytest.mark.processes def test_import_at_shutdown(): # GH-38626: importing PyArrow at interpreter shutdown would crash code = """if 1: diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 7d74a60dcb921..208812c3ac458 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2957,6 +2957,8 @@ def test_empty_arrays(self): def test_non_threaded_conversion(self): _non_threaded_conversion() + @pytest.mark.processes + @pytest.mark.threading def test_threaded_conversion_multiprocess(self): # Parallel conversion should work from child processes too (ARROW-2963) pool = mp.Pool(2) @@ -4824,6 +4826,7 @@ def test_timestamp_as_object_fixed_offset(): assert pa.table(result) == table +@pytest.mark.processes def test_threaded_pandas_import(): invoke_script("pandas_threaded_import.py") @@ -5127,6 +5130,7 @@ def roundtrip(df, schema=None): schema=schema) +@pytest.mark.processes def test_is_data_frame_race_condition(): # See https://github.com/apache/arrow/issues/39313 test_util.invoke_script('arrow_39313.py') diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 6a814111898b7..bc50697e1be17 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -18,14 +18,12 @@ import datetime import decimal import pytest -import sys import weakref import numpy as np import pyarrow as pa import pyarrow.compute as pc -from pyarrow.tests import util @pytest.mark.parametrize(['value', 'ty', 'klass'], [ @@ -157,8 +155,7 @@ def test_hashing_struct_scalar(): assert hash1 == hash2 -@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), - reason="Timezone database is not installed on Windows") +@pytest.mark.timezone_data def test_timestamp_scalar(): a = repr(pa.scalar("0000-01-01").cast(pa.timestamp("s"))) assert a == "" @@ -325,8 +322,7 @@ def test_cast(): pa.scalar('foo').cast('int32') -@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), - reason="Timezone database is not installed on Windows") +@pytest.mark.timezone_data def test_cast_timestamp_to_string(): # GH-35370 pytest.importorskip("pytz") diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index 8793c9e773c1d..1b05c58384cf0 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -83,6 +83,7 @@ def test_type_to_pandas_dtype(): @pytest.mark.pandas +@pytest.mark.processes def test_type_to_pandas_dtype_check_import(): # ARROW-7980 test_util.invoke_script('arrow_7980.py') diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index 3e6a4ca8ed222..29c6de65b1607 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -188,6 +188,10 @@ def test_read_tensor(tmpdir): path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor') write_mmap = pa.create_memory_map(path, data_size) pa.ipc.write_tensor(tensor, write_mmap) + if sys.platform == 'emscripten': + # emscripten doesn't support multiple + # memory maps to same file + write_mmap.close() # Try to read tensor read_mmap = pa.memory_map(path, mode='r') array = pa.ipc.read_tensor(read_mmap).to_numpy() diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index f7b6040f510af..aecf32c5076be 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -345,6 +345,7 @@ def test_pytz_tzinfo_to_string(): assert [pa.lib.tzinfo_to_string(i) for i in tz] == expected +@pytest.mark.timezone_data def test_dateutil_tzinfo_to_string(): if sys.platform == 'win32': # Skip due to new release of python-dateutil @@ -360,6 +361,7 @@ def test_dateutil_tzinfo_to_string(): assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris' +@pytest.mark.timezone_data def test_zoneinfo_tzinfo_to_string(): zoneinfo = pytest.importorskip('zoneinfo') if sys.platform == 'win32': diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py new file mode 100644 index 0000000000000..1a4b4a4e05614 --- /dev/null +++ b/python/scripts/run_emscripten_tests.py @@ -0,0 +1,343 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import argparse +import contextlib +import http.server +import os +import queue +import shutil +import subprocess +import sys +import time +import threading + +from pathlib import Path +from io import BytesIO + +from selenium import webdriver + + +class TemplateOverrider(http.server.SimpleHTTPRequestHandler): + def log_request(self, code="-", size="-"): + # don't log successful requests + return + + def do_GET(self) -> bytes | None: + if self.path.endswith(PYARROW_WHEEL_PATH.name): + self.send_response(200) + self.send_header("Content-type", "application/x-zip") + self.end_headers() + with PYARROW_WHEEL_PATH.open(mode="rb") as wheel: + self.copyfile(wheel, self.wfile) + if self.path.endswith("/test.html"): + body = b""" + + + + + + + + """ + self.send_response(200) + self.send_header("Content-type", "text/html") + self.send_header("Content-length", len(body)) + self.end_headers() + self.copyfile(BytesIO(body), self.wfile) + elif self.path.endswith("/worker.js"): + body = b""" + importScripts("./pyodide.js"); + onmessage = async function (e) { + const data = e.data; + if (!self.pyodide) { + self.pyodide = await loadPyodide(); + } + function do_print(arg) { + let databytes = Array.from(arg); + self.postMessage({print:databytes}); + return databytes.length; + } + self.pyodide.setStdout({write:do_print,isatty:data.isatty}); + self.pyodide.setStderr({write:do_print,isatty:data.isatty}); + + await self.pyodide.loadPackagesFromImports(data.python); + let results = await self.pyodide.runPythonAsync(data.python); + self.postMessage({results}); + } + """ + self.send_response(200) + self.send_header("Content-type", "application/javascript") + self.send_header("Content-length", len(body)) + self.end_headers() + self.copyfile(BytesIO(body), self.wfile) + + else: + return super().do_GET() + + def end_headers(self): + # Enable Cross-Origin Resource Sharing (CORS) + self.send_header("Access-Control-Allow-Origin", "*") + super().end_headers() + + +def run_server_thread(dist_dir, q): + global _SERVER_ADDRESS + os.chdir(dist_dir) + server = http.server.HTTPServer(("", 0), TemplateOverrider) + q.put(server.server_address) + print(f"Starting server for {dist_dir} at: {server.server_address}") + server.serve_forever() + + +@contextlib.contextmanager +def launch_server(dist_dir): + q = queue.Queue() + p = threading.Thread(target=run_server_thread, args=[dist_dir, q], daemon=True) + p.start() + address = q.get(timeout=50) + time.sleep(0.1) # wait to make sure server is started + yield address + p.terminate() + + +class NodeDriver: + import subprocess + + def __init__(self, hostname, port): + self.process = subprocess.Popen( + [shutil.which("script"), "-c", shutil.which("node")], + stdin=subprocess.PIPE, + shell=False, + bufsize=0, + ) + print(self.process) + time.sleep(0.1) # wait for node to start + self.hostname = hostname + self.port = port + self.last_ret_code = None + + def load_pyodide(self, dist_dir): + self.execute_js( + f""" + const {{ loadPyodide }} = require('{dist_dir}/pyodide.js'); + let pyodide = await loadPyodide(); + """ + ) + + def clear_logs(self): + pass # we don't handle logs for node + + def write_stdin(self, buffer): + # because we use unbuffered IO for + # stdout, stdin.write is also unbuffered + # so might under-run on writes + while len(buffer) > 0 and self.process.poll() is None: + written = self.process.stdin.write(buffer) + if written == len(buffer): + break + elif written == 0: + # full buffer - wait + time.sleep(0.01) + else: + buffer = buffer[written:] + + def execute_js(self, code, wait_for_terminate=True): + self.write_stdin((code + "\n").encode("utf-8")) + + def load_arrow(self): + self.execute_js(f"await pyodide.loadPackage('{PYARROW_WHEEL_PATH}');") + + def execute_python(self, code, wait_for_terminate=True): + js_code = f""" + python = `{code}`; + await pyodide.loadPackagesFromImports(python); + python_output = await pyodide.runPythonAsync(python); + """ + self.last_ret_code = self.execute_js(js_code, wait_for_terminate) + return self.last_ret_code + + def wait_for_done(self): + # in node we just let it run above + # then send EOF and join process + self.write_stdin(b"process.exit(python_output)\n") + return self.process.wait() + + +class BrowserDriver: + def __init__(self, hostname, port, driver): + self.driver = driver + self.driver.get(f"http://{hostname}:{port}/test.html") + self.driver.set_script_timeout(100) + + def load_pyodide(self, dist_dir): + pass + + def load_arrow(self): + self.execute_python( + f"import pyodide_js as pjs\n" + f"await pjs.loadPackage('{PYARROW_WHEEL_PATH.name}')\n" + ) + + def execute_python(self, code, wait_for_terminate=True): + if wait_for_terminate: + self.driver.execute_async_script( + f""" + let callback = arguments[arguments.length-1]; + python = `{code}`; + window.python_done_callback = callback; + window.pyworker.postMessage( + {{python, isatty: {'true' if sys.stdout.isatty() else 'false'}}}); + """ + ) + else: + self.driver.execute_script( + f""" + let python = `{code}`; + window.python_done_callback= (x) => {{window.python_script_done=x;}}; + window.pyworker.postMessage( + {{python,isatty:{'true' if sys.stdout.isatty() else 'false'}}}); + """ + ) + + def clear_logs(self): + self.driver.execute_script("window.python_logs = [];") + + def wait_for_done(self): + while True: + # poll for console.log messages from our webworker + # which are the output of pytest + lines = self.driver.execute_script( + "let temp = window.python_logs;window.python_logs=[];return temp;" + ) + if len(lines) > 0: + sys.stdout.buffer.write(bytes(lines)) + done = self.driver.execute_script("return window.python_script_done;") + if done is not None: + value = done["result"] + self.driver.execute_script("delete window.python_script_done;") + return value + time.sleep(0.1) + + +class ChromeDriver(BrowserDriver): + def __init__(self, hostname, port): + from selenium.webdriver.chrome.options import Options + + options = Options() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + super().__init__(hostname, port, webdriver.Chrome(options=options)) + + +class FirefoxDriver(BrowserDriver): + def __init__(self, hostname, port): + from selenium.webdriver.firefox.options import Options + + options = Options() + options.add_argument("--headless") + + super().__init__(hostname, port, webdriver.Firefox(options=options)) + + +def _load_pyarrow_in_runner(driver, wheel_name): + driver.load_arrow() + driver.execute_python( + """import sys +import micropip +if "pyarrow" not in sys.modules: + await micropip.install("hypothesis") + import pyodide_js as pjs + await pjs.loadPackage("numpy") + await pjs.loadPackage("pandas") + import pytest + import pandas # import pandas after pyarrow package load for pandas/pyarrow + # functions to work +import pyarrow + """, + wait_for_terminate=True, + ) + + +parser = argparse.ArgumentParser() +parser.add_argument( + "-d", + "--dist-dir", + type=str, + help="Pyodide distribution directory", + default="./pyodide", +) +parser.add_argument("wheel", type=str, help="Wheel to run tests from") +parser.add_argument( + "-t", "--test-submodule", help="Submodule that tests live in", default="test" +) +parser.add_argument( + "-r", + "--runtime", + type=str, + choices=["chrome", "node", "firefox"], + help="Runtime to run tests in", + default="chrome", +) +args = parser.parse_args() + +PYARROW_WHEEL_PATH = Path(args.wheel).resolve() + +dist_dir = Path(os.getcwd(), args.dist_dir).resolve() +print(f"dist dir={dist_dir}") +with launch_server(dist_dir) as (hostname, port): + if args.runtime == "chrome": + driver = ChromeDriver(hostname, port) + elif args.runtime == "node": + driver = NodeDriver(hostname, port) + elif args.runtime == "firefox": + driver = FirefoxDriver(hostname, port) + + print("Load pyodide in browser") + driver.load_pyodide(dist_dir) + print("Load pyarrow in browser") + _load_pyarrow_in_runner(driver, Path(args.wheel).name) + driver.clear_logs() + print("Run pytest in browser") + driver.execute_python( + """ +import pyarrow,pathlib +pyarrow_dir = pathlib.Path(pyarrow.__file__).parent +pytest.main([pyarrow_dir, '-v']) +""", + wait_for_terminate=False, + ) + print("Wait for done") + os._exit(driver.wait_for_done()) diff --git a/python/setup.py b/python/setup.py index b738b2f77290e..11cd7028023be 100755 --- a/python/setup.py +++ b/python/setup.py @@ -40,6 +40,14 @@ # Check if we're running 64-bit Python is_64_bit = sys.maxsize > 2**32 +# We can't use sys.platform in a cross-compiling situation +# as here it may be set to the host not target platform +is_emscripten = ( + sysconfig.get_config_var("SOABI") + and sysconfig.get_config_var("SOABI").find("emscripten") != -1 +) + + if Cython.__version__ < '0.29.31': raise Exception( 'Please update your Cython version. Supported Cython >= 0.29.31') @@ -298,8 +306,14 @@ def append_cmake_component(flag, varname): build_tool_args.append(f'-j{parallel}') # Generate the build files - print("-- Running cmake for PyArrow") - self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) + if is_emscripten: + print("-- Running emcmake cmake for PyArrow on Emscripten") + self.spawn(['emcmake', 'cmake'] + extra_cmake_args + + cmake_options + [source]) + else: + print("-- Running cmake for PyArrow") + self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) + print("-- Finished cmake for PyArrow") print("-- Running cmake --build for PyArrow") From ca7108dcd8c12bd29deb540f4e0c1e98fff4bcb4 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 6 Jul 2024 06:52:17 +0900 Subject: [PATCH 028/122] GH-43152: [Release] Require "digest/sha1" explicitly for thread safety (#43154) ### Rationale for this change If we don't require `digest/sha1` explicitly, it's required automatically when it's needed. But it's not thread safe. ### What changes are included in this PR? Require `digest/sha1` explicitly in the main thread to avoid auto require. ### Are these changes tested? No. ### Are there any user-facing changes? No. * GitHub Issue: #43152 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/release/binary-task.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index c2386a1f52f21..db008b1551309 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -16,6 +16,7 @@ # under the License. require "cgi/util" +require "digest/sha1" require "digest/sha2" require "io/console" require "json" From 8d5b289b100e068b47739d8fee0efdead9f1c574 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 6 Jul 2024 11:35:57 +0900 Subject: [PATCH 029/122] GH-43158: [Packaging] Use bundled nlohmann/json on AlmaLinux 8/CentOS Stream 8 (#43159) ### Rationale for this change Because json-devel on them don't provide nlohmann/json_fwd.h that is required by google-cloud-cpp. The upstream issue: https://github.com/googleapis/google-cloud-cpp/issues/14438 ### What changes are included in this PR? Use bundled nlohmann/json instead. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #43158 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index d5e6c3a332eb3..8d47407c03522 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -63,6 +63,7 @@ %define gcc_package gcc %endif +%define use_bundled_nlohmann_json (%{_rhel} == 8) %define use_flight (%{_rhel} >= 8 || %{_amzn} >= 2023) %define use_gandiva (%{_rhel} >= 8 || %{_amzn} >= 2023) %define use_gcs (%{_rhel} >= 8) @@ -117,7 +118,7 @@ BuildRequires: glog-devel BuildRequires: grpc-devel BuildRequires: grpc-plugins %endif -%if %{use_gcs} +%if %{use_gcs} && !%{use_bundled_nlohmann_json} BuildRequires: json-devel %endif BuildRequires: libzstd-devel @@ -194,6 +195,9 @@ cd cpp -DCMAKE_BUILD_TYPE=$cpp_build_type \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ +%if %{use_bundled_nlohmann_json} + -Dnlohmann_json_SOURCE=BUNDLED \ +%endif -G"Unix Makefiles" %arrow_cmake_build cd - @@ -280,7 +284,7 @@ Requires: %{name}%{so_version}-libs = %{version}-%{release} Requires: brotli-devel Requires: bzip2-devel Requires: curl-devel -%if %{use_gcs} +%if %{use_gcs} && !%{use_bundled_nlohmann_json} Requires: json-devel %endif Requires: libzstd-devel From 674e70891d1b3bc82b025d9c434d8ff1aa4c877e Mon Sep 17 00:00:00 2001 From: abandy Date: Sat, 6 Jul 2024 17:44:04 -0400 Subject: [PATCH 030/122] GH-43160: [Swift] Add Struct Array (#43161) ### Rationale for this change Adding StructArray for struct type impl ### What changes are included in this PR? Added StructArray impl * GitHub Issue: #43160 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowArray.swift | 108 +++++++++++++++++- .../Arrow/Sources/Arrow/ArrowCImporter.swift | 2 +- swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 12 +- swift/Arrow/Sources/Arrow/ArrowTable.swift | 2 +- swift/Arrow/Sources/Arrow/ChunkedArray.swift | 1 + .../Arrow/Tests/ArrowTests/CodableTests.swift | 4 +- 6 files changed, 109 insertions(+), 20 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift index 32b6ba1704511..b0f20ee06c2e4 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArray.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift @@ -21,7 +21,7 @@ public protocol ArrowArrayHolder { var type: ArrowType {get} var length: UInt {get} var nullCount: UInt {get} - var array: Any {get} + var array: AnyArray {get} var data: ArrowData {get} var getBufferData: () -> [Data] {get} var getBufferDataSizes: () -> [Int] {get} @@ -29,11 +29,11 @@ public protocol ArrowArrayHolder { } public class ArrowArrayHolderImpl: ArrowArrayHolder { - public let array: Any public let data: ArrowData public let type: ArrowType public let length: UInt public let nullCount: UInt + public let array: AnyArray public let getBufferData: () -> [Data] public let getBufferDataSizes: () -> [Int] public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn @@ -73,6 +73,50 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray(arrays))) } } + + public static func loadArray( // swiftlint:disable:this cyclomatic_complexity + _ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder { + switch arrowType.id { + case .int8: + return ArrowArrayHolderImpl(FixedArray(with)) + case .int16: + return ArrowArrayHolderImpl(FixedArray(with)) + case .int32: + return ArrowArrayHolderImpl(FixedArray(with)) + case .int64: + return ArrowArrayHolderImpl(FixedArray(with)) + case .uint8: + return ArrowArrayHolderImpl(FixedArray(with)) + case .uint16: + return ArrowArrayHolderImpl(FixedArray(with)) + case .uint32: + return ArrowArrayHolderImpl(FixedArray(with)) + case .uint64: + return ArrowArrayHolderImpl(FixedArray(with)) + case .double: + return ArrowArrayHolderImpl(FixedArray(with)) + case .float: + return ArrowArrayHolderImpl(FixedArray(with)) + case .date32: + return ArrowArrayHolderImpl(Date32Array(with)) + case .date64: + return ArrowArrayHolderImpl(Date64Array(with)) + case .time32: + return ArrowArrayHolderImpl(Time32Array(with)) + case .time64: + return ArrowArrayHolderImpl(Time64Array(with)) + case .string: + return ArrowArrayHolderImpl(StringArray(with)) + case .boolean: + return ArrowArrayHolderImpl(BoolArray(with)) + case .binary: + return ArrowArrayHolderImpl(BinaryArray(with)) + case .strct: + return ArrowArrayHolderImpl(StructArray(with)) + default: + throw ArrowError.invalid("Array not found for type: \(arrowType)") + } + } } public class ArrowArray: AsString, AnyArray { @@ -221,10 +265,7 @@ public class BinaryArray: ArrowArray { } public override func asString(_ index: UInt) -> String { - if self[index] == nil { - return "" - } - + if self[index] == nil {return ""} let data = self[index]! if options.printAsHex { return data.hexEncodedString() @@ -233,3 +274,58 @@ public class BinaryArray: ArrowArray { } } } + +public class StructArray: ArrowArray<[Any?]> { + public private(set) var arrowFields: [ArrowArrayHolder]? + public required init(_ arrowData: ArrowData) { + super.init(arrowData) + } + + public func initialize() throws -> StructArray { + var fields = [ArrowArrayHolder]() + for child in arrowData.children { + fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child)) + } + + self.arrowFields = fields + return self + } + + public override subscript(_ index: UInt) -> [Any?]? { + if self.arrowData.isNull(index) { + return nil + } + + if let fields = arrowFields { + var result = [Any?]() + for field in fields { + result.append(field.array.asAny(index)) + } + + return result + } + + return nil + } + + public override func asString(_ index: UInt) -> String { + if self.arrowData.isNull(index) { + return "" + } + + var output = "{" + if let fields = arrowFields { + for fieldIndex in 0.. [Data] {self.holder.getBufferData} public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes} diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift index 7e684f360ac51..35dd4dcd1e899 100644 --- a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -96,11 +96,7 @@ public class ArrowDecoder: Decoder { throw ArrowError.invalid("Column for key \"\(name)\" not found") } - guard let anyArray = col.array as? AnyArray else { - throw ArrowError.invalid("Unable to convert array to AnyArray") - } - - return anyArray + return col.array } func getCol(_ index: Int) throws -> AnyArray { @@ -108,11 +104,7 @@ public class ArrowDecoder: Decoder { throw ArrowError.outOfBounds(index: Int64(index)) } - guard let anyArray = self.columns[index].array as? AnyArray else { - throw ArrowError.invalid("Unable to convert array to AnyArray") - } - - return anyArray + return self.columns[index].array } func doDecode(_ key: CodingKey) throws -> T? { diff --git a/swift/Arrow/Sources/Arrow/ArrowTable.swift b/swift/Arrow/Sources/Arrow/ArrowTable.swift index b9d15154c4f94..dedf90f791cce 100644 --- a/swift/Arrow/Sources/Arrow/ArrowTable.swift +++ b/swift/Arrow/Sources/Arrow/ArrowTable.swift @@ -185,7 +185,7 @@ public class RecordBatch { public func anyData(for columnIndex: Int) -> AnyArray { let arrayHolder = column(columnIndex) - return (arrayHolder.array as! AnyArray) // swiftlint:disable:this force_cast + return arrayHolder.array } public func column(_ index: Int) -> ArrowArrayHolder { diff --git a/swift/Arrow/Sources/Arrow/ChunkedArray.swift b/swift/Arrow/Sources/Arrow/ChunkedArray.swift index c5ccfe4aec0e6..fb5734f64b6ba 100644 --- a/swift/Arrow/Sources/Arrow/ChunkedArray.swift +++ b/swift/Arrow/Sources/Arrow/ChunkedArray.swift @@ -18,6 +18,7 @@ import Foundation public protocol AnyArray { + var arrowData: ArrowData {get} func asAny(_ index: UInt) -> Any? var length: UInt {get} } diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift index a0c4e111e4360..b8f389a5e0089 100644 --- a/swift/Arrow/Tests/ArrowTests/CodableTests.swift +++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift @@ -227,7 +227,7 @@ final class CodableTests: XCTestCase { // swiftlint:disable:this type_body_lengt } func getArrayValue(_ rb: RecordBatch, colIndex: Int, rowIndex: UInt) -> T? { - let anyArray = rb.columns[colIndex].array as! AnyArray // swiftlint:disable:this force_cast + let anyArray = rb.columns[colIndex].array return anyArray.asAny(UInt(rowIndex)) as? T } @@ -324,7 +324,7 @@ final class CodableTests: XCTestCase { // swiftlint:disable:this type_body_lengt XCTAssertEqual(rb.columns[0].type.id, ArrowTypeId.int32) for index in 0..<100 { if index == 10 { - let anyArray = rb.columns[0].array as! AnyArray // swiftlint:disable:this force_cast + let anyArray = rb.columns[0].array XCTAssertNil(anyArray.asAny(UInt(index))) } else { XCTAssertEqual(getArrayValue(rb, colIndex: 0, rowIndex: UInt(index)), Int32(index)) From cad13bf8a65eaf13ec9feb78447d9b2f14c63965 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sun, 7 Jul 2024 11:57:29 -0500 Subject: [PATCH 031/122] GH-43153: [R] pull on a grouped query returns the wrong column (#43172) ### Rationale for this change Fix a bug in our implementation of `pull` on grouped datasets ### What changes are included in this PR? An additional test, the fix. ### Are these changes tested? Yes, with the test I added to. ### Are there any user-facing changes? Users will now get the expected behavior when using `pull` on grouped queries. **This PR contains a "Critical Fix".** * GitHub Issue: #43153 Lead-authored-by: Jonathan Keane Co-authored-by: Neal Richardson Signed-off-by: Jonathan Keane --- r/R/dplyr-collect.R | 2 +- r/tests/testthat/test-dplyr-query.R | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index c3232c6ff7230..08555cd9f3e6a 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -64,7 +64,7 @@ pull.Dataset <- function(.data, .data <- as_adq(.data) var <- vars_pull(names(.data), !!enquo(var)) .data$selected_columns <- set_names(.data$selected_columns[var], var) - out <- dplyr::compute(.data)[[1]] + out <- dplyr::compute(.data)[[var]] handle_pull_as_vector(out, as_vector) } pull.RecordBatchReader <- pull.arrow_dplyr_query <- pull.Dataset diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index bab81a463e9ee..7c75a84234bfc 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -87,6 +87,7 @@ test_that("pull", { .input %>% filter(int > 4) %>% rename(strng = chr) %>% + group_by(dbl) %>% pull(strng) %>% as.vector(), tbl From ca49843b8a8f34d5fdc06a4539ec3a2ad276df5d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sun, 7 Jul 2024 11:58:04 -0500 Subject: [PATCH 032/122] GH-43044: [R] So-called non-API entry points (#43173) ### Rationale for this change CRAN ### What changes are included in this PR? Remove so-called non-api calls ### Are these changes tested? With tests ### Are there any user-facing changes? Hopefully not **This PR contains a "Critical Fix".** * GitHub Issue: #43044 Authored-by: Jonathan Keane Signed-off-by: Jonathan Keane --- r/src/arrow_cpp11.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h index ab60586628164..5e6a7d5a42fb2 100644 --- a/r/src/arrow_cpp11.h +++ b/r/src/arrow_cpp11.h @@ -39,14 +39,6 @@ #define ARROW_R_DCHECK(EXPR) #endif -// borrowed from enc package -// because R does not make these macros available (i.e. from Defn.h) -#define UTF8_MASK (1 << 3) -#define ASCII_MASK (1 << 6) - -#define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) -#define IS_UTF8(x) (LEVELS(x) & UTF8_MASK) - // For context, see: // https://github.com/r-devel/r-svn/blob/6418faeb6f5d87d3d9b92b8978773bc3856b4b6f/src/main/altrep.c#L37 #define ALTREP_CLASS_SERIALIZED_CLASS(x) ATTRIB(x) @@ -133,19 +125,11 @@ class complexs { // functions that need to be called from an unwind_protect() namespace unsafe { -inline const char* utf8_string(SEXP s) { - if (!IS_UTF8(s) && !IS_ASCII(s)) { - return Rf_translateCharUTF8(s); - } else { - return CHAR(s); - } -} +inline const char* utf8_string(SEXP s) { return Rf_translateCharUTF8(s); } inline R_xlen_t r_string_size(SEXP s) { if (s == NA_STRING) { return 0; - } else if (IS_ASCII(s) || IS_UTF8(s)) { - return XLENGTH(s); } else { return strlen(Rf_translateCharUTF8(s)); } @@ -164,7 +148,7 @@ inline SEXP utf8_strings(SEXP x) { for (R_xlen_t i = 0; i < n; i++, ++p_x) { SEXP s = *p_x; - if (s != NA_STRING && !IS_UTF8(s) && !IS_ASCII(s)) { + if (s != NA_STRING) { SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8)); } } @@ -394,7 +378,7 @@ SEXP to_r6(const std::shared_ptr& ptr, const char* r6_class_name) { cpp11::external_pointer> xp(new std::shared_ptr(ptr)); SEXP r6_class = Rf_install(r6_class_name); - if (Rf_findVarInFrame3(arrow::r::ns::arrow, r6_class, FALSE) == R_UnboundValue) { + if (!R_existsVarInFrame(arrow::r::ns::arrow, r6_class)) { cpp11::stop("No arrow R6 class named '%s'", r6_class_name); } From 8e5c4e6acafb7ef6deeaff929de6a80d6815845d Mon Sep 17 00:00:00 2001 From: Patrick Aboyoun <6666529+aboyoun@users.noreply.github.com> Date: Sun, 7 Jul 2024 09:59:14 -0700 Subject: [PATCH 033/122] GH-43163: [R] Fix bindings in Math group generics (#43162) ### Rationale for this change When support was added for `cumsum` in the `Math` group generics it mistakenly mapped `signif`, `expm1`, `log1p`, `cospi`, `sinpi`, `tanpi`, `cosh`, `sinh`, `tanh`, `acosh`, `asinh`, `atanh`, `lgamma`, `gamma`, `digamma`, and `trigamma` to the `cumulative_sum_checked` arrow function. This PR corrects that mistake and well as adds support for `log2`, `log1p`, `cumprod`, `cummax`, and `cummin`. ### What changes are included in this PR? It contains the following changes: 1. `acos`, `asin`, `cos`, `sin`, `tan` now map to the `*_checked` arrow function variants 2. `log2` maps to the `log2_checked` arrow function 3. `log1p` maps to the `log1p_checked` arrow function 4. `cumprod` maps to the `cumulative_prod_checked` arrow function 5. `cummax` maps to the `cumulative_max` arrow function 6. `cummin` maps to the `cumulative_min` arrow function 7. `signif`, `expm1`, `cospi`, `sinpi`, `tanpi`, `cosh`, `sinh`, `tanh`, `acosh`, `asinh`, `atanh`, `lgamma`, `gamma`, `digamma`, and `trigamma` properly throw an unsupported operation error ### Are these changes tested? Yes, tests were added to "Math group generics work on Array objects" in `arrow/r/tests/testthat/test-compute-arith.R` ### Are there any user-facing changes? No * GitHub Issue: #43163 Authored-by: Patrick Aboyoun Signed-off-by: Jonathan Keane --- r/R/arrow-datum.R | 31 +++++++-------- r/tests/testthat/test-compute-arith.R | 57 +++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 19 deletions(-) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 4770b03b9ca48..ba513ef470cfb 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -115,19 +115,19 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { switch(.Generic, abs = eval_array_expression("abs_checked", x), ceiling = eval_array_expression("ceil", x), - sign = , - floor = , - trunc = , - acos = , - asin = , - atan = , - cos = , - sin = , - tan = { - eval_array_expression(.Generic, x) - }, + sign = eval_array_expression("sign", x), + floor = eval_array_expression("floor", x), + trunc = eval_array_expression("trunc", x), + acos = eval_array_expression("acos_checked", x), + asin = eval_array_expression("asin_checked", x), + atan = eval_array_expression("atan", x), + cos = eval_array_expression("cos_checked", x), + sin = eval_array_expression("sin_checked", x), + tan = eval_array_expression("tan_checked", x), log = eval_array_expression("logb_checked", x, base), log10 = eval_array_expression("log10_checked", x), + log2 = eval_array_expression("log2_checked", x), + log1p = eval_array_expression("log1p_checked", x), round = eval_array_expression( "round", x, @@ -135,9 +135,12 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { ), sqrt = eval_array_expression("sqrt_checked", x), exp = eval_array_expression("power_checked", exp(1), x), + cumsum = eval_array_expression("cumulative_sum_checked", x), + cumprod = eval_array_expression("cumulative_prod_checked", x), + cummax = eval_array_expression("cumulative_max", x), + cummin = eval_array_expression("cumulative_min", x), signif = , expm1 = , - log1p = , cospi = , sinpi = , tanpi = , @@ -151,10 +154,6 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { gamma = , digamma = , trigamma = , - cumsum = eval_array_expression("cumulative_sum_checked", x), - cumprod = , - cummax = , - cummin = , stop(paste0("Unsupported operation on `", class(x)[1L], "` : "), .Generic, call. = FALSE) ) } diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R index 5cffafe41e668..bbdcb10a6b1c2 100644 --- a/r/tests/testthat/test-compute-arith.R +++ b/r/tests/testthat/test-compute-arith.R @@ -162,6 +162,8 @@ test_that("Math group generics work on Array objects", { Array$create(log(c(0.6, 2.1), base = 2)) ) expect_equal(log10(Array$create(c(0.6, 2.1))), Array$create(log10(c(0.6, 2.1)))) + expect_equal(log2(Array$create(c(0.6, 2.1))), Array$create(log2(c(0.6, 2.1)))) + expect_equal(log1p(Array$create(c(0.6, 2.1, 0))), Array$create(log1p(c(0.6, 2.1, 0)))) expect_equal(round(Array$create(c(0.6, 2.1))), Array$create(c(1, 2))) expect_equal( @@ -175,6 +177,7 @@ test_that("Math group generics work on Array objects", { round(exp(Array$create(c(2L, 1L))), digits = 10), Array$create(round(exp(c(2L, 1L)), 10)) ) + expect_as_vector( cumsum(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), c(2.3, 1.3, 9.2, NA_real_, NA_real_) @@ -186,8 +189,56 @@ test_that("Math group generics work on Array objects", { c(2L, 9L, 17L, 16L, 18L, 35L, NA_integer_, NA_integer_, NA_integer_) ) - expect_error( - cumprod(Array$create(c(4L, 1L))), - "Unsupported operation on `Array`" + expect_as_vector( + cumprod(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), + c(2.3, -2.3, -18.17, NA_real_, NA_real_) + ) + expect_equal(cumprod(Array$create(-10L)), Array$create(-10L)) + expect_equal(cumprod(Array$create(NA_integer_)), Array$create(NA_integer_)) + expect_as_vector( + cumprod(ChunkedArray$create(c(2L, 7L, 8L), c(-1L, 2L, 17L, NA_integer_, 3L), 18L)), + c(2L, 14L, 112L, -112L, -224L, -3808L, NA_integer_, NA_integer_, NA_integer_) + ) + + expect_as_vector( + cummax(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), + c(2.3, 2.3, 7.9, NA_real_, NA_real_) ) + expect_equal(cummax(Array$create(-10L)), Array$create(-10L)) + expect_equal(cummax(Array$create(NA_integer_)), Array$create(NA_integer_)) + expect_as_vector( + cummax(ChunkedArray$create(c(2L, 7L, 8L), c(-1L, 2L, 17L, NA_integer_, 3L), 18L)), + c(2L, 7L, 8L, 8L, 8L, 17L, NA_integer_, NA_integer_, NA_integer_) + ) + + expect_as_vector( + cummin(Array$create(c(2.3, -1.0, 7.9, NA_real_, 1.0))), + c(2.3, -1, -1, NA_real_, NA_real_) + ) + expect_equal(cummin(Array$create(-10L)), Array$create(-10L)) + expect_equal(cummin(Array$create(NA_integer_)), Array$create(NA_integer_)) + expect_as_vector( + cummin(ChunkedArray$create(c(2L, 7L, 8L), c(-1L, 2L, 17L, NA_integer_, 3L), 18L)), + c(2L, 2L, 2L, -1L, -1L, -1L, NA_integer_, NA_integer_, NA_integer_) + ) + + expect_error(signif(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(expm1(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(cospi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(sinpi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(tanpi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(cosh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(sinh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(tanh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(acosh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(asinh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(atanh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + + expect_error(lgamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(gamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(digamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") + expect_error(trigamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") }) From 38d37b46d1b9bd49c134f5a56f9fb1e9ddca7b3d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 8 Jul 2024 06:28:30 +0900 Subject: [PATCH 034/122] GH-43075: [CI][Crossbow][Docker] Set timeout for docker-tests (#43078) ### Rationale for this change If we don't have timeout and a test gets stuck, the job takes 6 hours. ### What changes are included in this PR? Set timeout. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #43075 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/tasks/docker-tests/github.cuda.yml | 3 ++- dev/tasks/docker-tests/github.linux.yml | 1 + dev/tasks/tasks.yml | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml index b1b21c3e66319..30879042924c4 100644 --- a/dev/tasks/docker-tests/github.cuda.yml +++ b/dev/tasks/docker-tests/github.cuda.yml @@ -23,8 +23,9 @@ jobs: test: name: | Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} - runs-on: ['self-hosted', 'cuda'] + runs-on: ['self-hosted', 'cuda'] {{ macros.github_set_env(env) }} + timeout-minutes: {{ timeout|default(60) }} steps: {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} # python 3.8 is installed on the runner, no need to install diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index 13e00abc70a84..697960360cfdc 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -25,6 +25,7 @@ jobs: Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} runs-on: ubuntu-latest {{ macros.github_set_env(env) }} + timeout-minutes: {{ timeout|default(60) }} steps: {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} {{ macros.github_free_space()|indent }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 7a86fd3e3e75f..45417acf856b5 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1280,6 +1280,7 @@ tasks: env: ARROW_R_DEV: "TRUE" image: ubuntu-r-valgrind + timeout: 300 # 5 hours test-r-linux-rchk: ci: github @@ -1564,10 +1565,11 @@ tasks: TEST_PYARROW_ONLY: "{{ test_pyarrow_only }}" NUMPY: "{{ numpy_version }}" JDK: "{{ jdk_version }}" + fetch_depth: 0 # use the branch-3.0 of spark, so prevent reusing any layers flags: --no-leaf-cache image: conda-python-spark - fetch_depth: 0 + timeout: 90 {% endfor %} {% for kind in ["static", "static-system-dependency"] %} From e8a795b812e2687eac41569793434ac14db83f3c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 8 Jul 2024 17:52:58 +0900 Subject: [PATCH 035/122] GH-42149: [C++] Use FetchContent for bundled ORC (#43011) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change This also has a workaround for https://issues.apache.org/jira/browse/ORC-1732 . ### What changes are included in this PR? ORC 2.0.1 has a dependency detection problem. We can't override the detection with ExternalProject but can override the detection with FetchContent. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #42149 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- cpp/CMakeLists.txt | 8 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 279 ++++++++++++------ cpp/thirdparty/versions.txt | 4 +- .../linux-packages/apache-arrow/debian/rules | 1 + 4 files changed, 201 insertions(+), 91 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 679842c31e0b1..2e2a4971840a8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -71,6 +71,14 @@ if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() +# https://cmake.org/cmake/help/latest/policy/CMP0170.html +# +# CMP0170 is for enforcing dependency populations by users with +# FETCHCONTENT_FULLY_DISCONNECTED=ON. +if(POLICY CMP0170) + cmake_policy(SET CMP0170 NEW) +endif() + set(ARROW_VERSION "17.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 7dab0a362ff24..8cb3ec83f57db 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2532,6 +2532,7 @@ macro(build_zlib) set_property(TARGET ZLIB::ZLIB PROPERTY IMPORTED_LOCATION "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a") + target_include_directories(ZLIB::ZLIB INTERFACE "${EMSCRIPTEN_SYSROOT}/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") @@ -4490,116 +4491,216 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") # ---------------------------------------------------------------------- # Apache ORC -macro(build_orc) +function(build_orc) message(STATUS "Building Apache ORC from source") - set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") - set(ORC_HOME "${ORC_PREFIX}") - set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") - set(ORC_STATIC_LIB - "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}") + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) + fetchcontent_declare(orc + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() + + set(CMAKE_UNITY_BUILD FALSE) + + set(ORC_PREFER_STATIC_LZ4 + OFF + CACHE BOOL "" FORCE) + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) + set(LZ4_HOME + ${LZ4_ROOT} + CACHE STRING "" FORCE) + set(LZ4_LIBRARY + LZ4::lz4 + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_PROTOBUF + OFF + CACHE BOOL "" FORCE) + get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) + set(PROTOBUF_HOME + ${Protobuf_ROOT} + CACHE STRING "" FORCE) + # ORC uses this. + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") + set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) + set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) + set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) + + set(ORC_PREFER_STATIC_SNAPPY + OFF + CACHE BOOL "" FORCE) + get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) + set(SNAPPY_HOME + ${Snappy_ROOT} + CACHE STRING "" FORCE) + set(SNAPPY_LIBRARY + ${Snappy_TARGET} + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZLIB + OFF + CACHE BOOL "" FORCE) + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) + set(ZLIB_HOME + ${ZLIB_ROOT} + CACHE STRING "" FORCE) + set(ZLIB_LIBRARY + ZLIB::ZLIB + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZSTD + OFF + CACHE BOOL "" FORCE) + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) + set(ZSTD_HOME + ${ZSTD_ROOT} + CACHE STRING "" FORCE) + set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) + + set(BUILD_CPP_TESTS + OFF + CACHE BOOL "" FORCE) + set(BUILD_JAVA + OFF + CACHE BOOL "" FORCE) + set(BUILD_LIBHDFSPP + OFF + CACHE BOOL "" FORCE) + set(BUILD_TOOLS + OFF + CACHE BOOL "" FORCE) + set(INSTALL_VENDORED_LIBS + OFF + CACHE BOOL "" FORCE) + set(STOP_BUILD_ON_WARNING + OFF + CACHE BOOL "" FORCE) + + # We can remove this with ORC 2.0.2 or later. + list(PREPEND CMAKE_MODULE_PATH + ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) + + fetchcontent_makeavailable(orc) + + add_library(orc::orc INTERFACE IMPORTED) + target_link_libraries(orc::orc INTERFACE orc) + target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") + + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) + else() + set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") + set(ORC_HOME "${ORC_PREFIX}") + set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") + set(ORC_STATIC_LIB + "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) - get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) + get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) - get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) + get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) - get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) - set(ORC_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" - -DSTOP_BUILD_ON_WARNING=OFF - -DBUILD_LIBHDFSPP=OFF - -DBUILD_JAVA=OFF - -DBUILD_TOOLS=OFF - -DBUILD_CPP_TESTS=OFF - -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" - "-DPROTOBUF_EXECUTABLE=$" - "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" - "-DPROTOBUF_INCLUDE_DIR=$" - "-DPROTOBUF_LIBRARY=$" - "-DPROTOC_LIBRARY=$" - "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" - "-DSNAPPY_LIBRARY=$" - "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIB=$" - "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" - "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" - "-DZSTD_HOME=${ORC_ZSTD_ROOT}" - "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") - endif() + set(ORC_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" + -DSTOP_BUILD_ON_WARNING=OFF + -DBUILD_LIBHDFSPP=OFF + -DBUILD_JAVA=OFF + -DBUILD_TOOLS=OFF + -DBUILD_CPP_TESTS=OFF + -DINSTALL_VENDORED_LIBS=OFF + "-DLZ4_HOME=${ORC_LZ4_ROOT}" + "-DPROTOBUF_EXECUTABLE=$" + "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" + "-DPROTOBUF_INCLUDE_DIR=$" + "-DPROTOBUF_LIBRARY=$" + "-DPROTOC_LIBRARY=$" + "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DSNAPPY_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIB=$" + "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" + "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" + "-DZSTD_HOME=${ORC_ZSTD_ROOT}" + "-DZSTD_INCLUDE_DIR=$" + "-DZSTD_LIBRARY=$") + if(ZLIB_ROOT) + set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") + endif() - # Work around CMake bug - file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) + # Work around CMake bug + file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) - externalproject_add(orc_ep - ${EP_COMMON_OPTIONS} - URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS ${ORC_STATIC_LIB} - CMAKE_ARGS ${ORC_CMAKE_ARGS} - DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} - ${ARROW_PROTOBUF_PROTOC} - ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET} - LZ4::lz4 - ZLIB::ZLIB) - - set(ORC_VENDORED 1) - - add_library(orc::orc STATIC IMPORTED) - set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") - target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") - target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET}) - # Protobuf generated files may use ABSL_DCHECK*() and - # absl::log_internal_check_op is needed for them. - if(TARGET absl::log_internal_check_op) - target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) - endif() - if(NOT MSVC) - if(NOT APPLE AND ARROW_ENABLE_THREADING) - target_link_libraries(orc::orc INTERFACE Threads::Threads) - endif() - target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) - endif() - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") - target_link_libraries(orc::orc INTERFACE stdc++fs) + externalproject_add(orc_ep + ${EP_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS ${ORC_STATIC_LIB} + CMAKE_ARGS ${ORC_CMAKE_ARGS} + DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_PROTOBUF_PROTOC} + ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET} + LZ4::lz4 + ZLIB::ZLIB) + add_library(orc::orc STATIC IMPORTED) + set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") + target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") + target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET}) + # Protobuf generated files may use ABSL_DCHECK*() and + # absl::log_internal_check_op is needed for them. + if(TARGET absl::log_internal_check_op) + target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8") - target_link_libraries(orc::orc INTERFACE c++fs) + if(NOT MSVC) + if(NOT APPLE AND ARROW_ENABLE_THREADING) + target_link_libraries(orc::orc INTERFACE Threads::Threads) + endif() + target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) endif() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) + add_dependencies(orc::orc orc_ep) + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) endif() - add_dependencies(orc::orc orc_ep) - - list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) -endmacro() + set(ORC_VENDORED + TRUE + PARENT_SCOPE) + set(ARROW_BUNDLED_STATIC_LIBS + ${ARROW_BUNDLED_STATIC_LIBS} + PARENT_SCOPE) +endfunction() if(ARROW_ORC) resolve_dependency(orc HAVE_ALT TRUE) - target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) if(ORC_VENDORED) set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION}) else() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) set(ARROW_ORC_VERSION ${orcAlt_VERSION}) + message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") + message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() - message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") - message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() # ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 986ac056b61a6..ab988badec145 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.0 -ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df +ARROW_ORC_BUILD_VERSION=2.0.1 +ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 83bcad98a7a6e..6c3074ab234e1 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -51,6 +51,7 @@ override_dh_auto_configure: -DARROW_WITH_ZSTD=ON \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCUDAToolkit_ROOT=/usr \ + -DFETCHCONTENT_FULLY_DISCONNECTED=OFF \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON From 5c64f74cf2840b37893d8158475676c290fa6615 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Mon, 8 Jul 2024 14:02:25 -0400 Subject: [PATCH 036/122] MINOR: Remove inactive contributors (#43177) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes are included in this PR? This list has a hardcoded limit of 10 entries. Audit the list for inactivity to avoid prematurely hitting the limit. ### Are these changes tested? n/a ### Are there any user-facing changes? No Authored-by: Dane Pitkin Signed-off-by: Raúl Cumplido --- .asf.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.asf.yaml b/.asf.yaml index 28d280e477106..12438081cfc57 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -21,7 +21,6 @@ github: collaborators: - anjakefala - benibus - - davisusanibar - jbonofre - js8544 - laurentgo From dfba99fdb58920da64c69cfa75a71eb76998756e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:35:15 -0400 Subject: [PATCH 037/122] MINOR: [Go] Bump golang.org/x/sys from 0.21.0 to 0.22.0 in /go (#43180) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.21.0 to 0.22.0.
Commits
  • faed7ec unix: add PthreadChdir and PthreadFchdir on darwin
  • c892bb7 unix: fix MmapPtr test failing on OpenBSD
  • a0ef40a unix: fix MremapPtr test failing on NetBSD
  • daa2394 unix: add unsafe mmap, munmap, mremap
  • 7670087 windows: add GetAce Windows API
  • 348425a windows/svc: do not pass theService to windows.RegisterServiceCtrlHandlerEx
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/sys&package-manager=go_modules&previous-version=0.21.0&new-version=0.22.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 7819ab146d80d..0e8e48d04089f 100644 --- a/go/go.mod +++ b/go/go.mod @@ -36,7 +36,7 @@ require ( github.com/zeebo/xxh3 v1.0.2 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 - golang.org/x/sys v0.21.0 + golang.org/x/sys v0.22.0 golang.org/x/tools v0.22.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 diff --git a/go/go.sum b/go/go.sum index 581930a3909cd..c7ff6bd5f6666 100644 --- a/go/go.sum +++ b/go/go.sum @@ -126,8 +126,8 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= From 5c64b6eda44a1b5d37fa2206880fca920fbef362 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 8 Jul 2024 16:36:27 -0400 Subject: [PATCH 038/122] MINOR: [R] Add news bullet for mutate() enhancement in 17.0 (#43189) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change I noticed this was missing from the NEWS, and it's kinda important. ### What changes are included in this PR? A news bullet ### Are these changes tested? Nope! ### Are there any user-facing changes? 📰 --- r/NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/r/NEWS.md b/r/NEWS.md index 317e546a1b70f..1e8a480ef5f65 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -20,6 +20,7 @@ # arrow 16.1.0.9000 * R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) +* `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350) * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. * The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) * R metadata, stored in the Arrow schema to support round-tripping data between R and Arrow/Parquet, is now serialized and deserialized more strictly. This makes it safer to load data from files from unknown sources into R data.frames. (#41969) From 7fcd8be2b8d2ea3e6759edf91b576e21647f5e24 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Jul 2024 17:54:50 -0400 Subject: [PATCH 039/122] MINOR: [Go] Bump golang.org/x/tools from 0.22.0 to 0.23.0 in /go (#43181) Bumps [golang.org/x/tools](https://github.com/golang/tools) from 0.22.0 to 0.23.0.
Commits
  • 33be3ef go.mod: update golang.org/x dependencies
  • 0e7ccc0 gopls/internal/golang: provide version info for stdlib in pkgdoc
  • fcf5463 gopls/internal/server: add counters to inform v0.17.0
  • 70a59b2 gopls/doc: respond to Hana's review of CL 583316
  • 799a471 gopls/doc: document all of gopls' features
  • 2e239ad gopls/internal/golang: provide version info for stdlib fields
  • bc15dd8 gopls/internal/analysis/fillstruct: use package name (not path) in UI
  • 72edac2 internal/typeparams: fix crash in interface hover with empty type set
  • c0ae6bb gopls/internal/golang: splitlines: s/parameter/arguments/ in CallExpr
  • 5cc2d0b gopls/internal/golang: splitlines: remove workaround for golang/go#68202
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/tools&package-manager=go_modules&previous-version=0.22.0&new-version=0.23.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go/go.mod | 6 +++--- go/go.sum | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/go/go.mod b/go/go.mod index 0e8e48d04089f..a5e359741c26c 100644 --- a/go/go.mod +++ b/go/go.mod @@ -37,7 +37,7 @@ require ( golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 golang.org/x/sys v0.22.0 - golang.org/x/tools v0.22.0 + golang.org/x/tools v0.23.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 @@ -75,8 +75,8 @@ require ( github.com/tidwall/gjson v1.14.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect - golang.org/x/mod v0.18.0 // indirect - golang.org/x/net v0.26.0 // indirect + golang.org/x/mod v0.19.0 // indirect + golang.org/x/net v0.27.0 // indirect golang.org/x/text v0.16.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go/go.sum b/go/go.sum index c7ff6bd5f6666..6ce51c83350a0 100644 --- a/go/go.sum +++ b/go/go.sum @@ -113,14 +113,14 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= -golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= +golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= -golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= -golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= +golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -130,8 +130,8 @@ golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= -golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= +golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg= +golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= From d25ec6a860ca9f41d88bef2ab7432db7b59bbe1f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 07:54:24 +0900 Subject: [PATCH 040/122] MINOR: [Java] Bump com.fasterxml.jackson:jackson-bom from 2.17.1 to 2.17.2 in /java (#43179) Bumps [com.fasterxml.jackson:jackson-bom](https://github.com/FasterXML/jackson-bom) from 2.17.1 to 2.17.2.
Commits
  • e239d65 [maven-release-plugin] prepare release jackson-bom-2.17.2
  • 154dbc5 Prepare for 2.17.2 release
  • 017f7ae Update CI
  • fec40e1 Back to snapshot dep
  • 14f8741 [maven-release-plugin] prepare for next development iteration
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.fasterxml.jackson:jackson-bom&package-manager=maven&previous-version=2.17.1&new-version=2.17.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index b047aa5d78374..0756128ee31b8 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -94,7 +94,7 @@ under the License. 4.1.110.Final 1.63.0 3.25.1 - 2.17.1 + 2.17.2 3.4.0 24.3.25 1.11.3 From 8fc40fc02116482603c28edbba496305a0b8b0e6 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Tue, 9 Jul 2024 00:17:31 -0400 Subject: [PATCH 041/122] MINOR: [Java] Update issue management config in pom.xml (#43191) ### Rationale for this change Update the issue management config in Java's parent pom. ### What changes are included in this PR? * Change issue management config from Jira to GitHub ### Are these changes tested? n/a Authored-by: Dane Pitkin Signed-off-by: Sutou Kouhei --- .github/CONTRIBUTING.md | 8 ++++---- java/pom.xml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 37a1be7d2c00b..beb126eaf9496 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -40,7 +40,7 @@ We prefer to receive contributions in the form of GitHub pull requests. Please send pull requests against the [github.com/apache/arrow][4] repository following the procedure below. -If you are looking for some ideas on what to contribute, check out the [JIRA +If you are looking for some ideas on what to contribute, check out the [GitHub issues][3] for the Apache Arrow project. Comment on the issue and/or contact [dev@arrow.apache.org](https://lists.apache.org/list.html?dev@arrow.apache.org) with your questions and ideas. @@ -53,8 +53,8 @@ To contribute a patch: 1. Break your work into small, single-purpose patches if possible. It’s much harder to merge in a large change with a lot of disjoint features. -2. If one doesn't already exist, create a JIRA for your patch on the [Arrow Project -JIRA](https://issues.apache.org/jira/browse/ARROW). +2. If one doesn't already exist, create a GitHub issue for your patch on the [Arrow Project +GitHub](https://github.com/apache/arrow/issues). 3. Submit the patch as a GitHub pull request against the main branch. For a tutorial, see the GitHub guides on [forking a repo](https://help.github.com/en/articles/fork-a-repo) and [sending a pull request](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork). So that your pull request syncs with the JIRA issue, prefix your pull request @@ -68,5 +68,5 @@ Thank you in advance for your contributions! [1]: mailto:dev-subscribe@arrow.apache.org [2]: https://github.com/apache/arrow/tree/main/format -[3]: https://issues.apache.org/jira/browse/ARROW +[3]: https://github.com/apache/arrow/issues [4]: https://github.com/apache/arrow diff --git a/java/pom.xml b/java/pom.xml index 0756128ee31b8..2cc3efc29ff13 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -81,8 +81,8 @@ under the License. - Jira - https://issues.apache.org/jira/browse/arrow + GitHub + https://github.com/apache/arrow/issues From 0c4d6c7f775ad2b106e25d37fa4119a2dbf2a7f5 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Tue, 9 Jul 2024 11:23:28 -0400 Subject: [PATCH 042/122] GH-43199: [CI][Packaging] dev/release/utils-create-release-tarball.sh should not include the release candidate number in the name of the tarball's top-level directory. (#43200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change `dev/release/util-create-release-tarball.sh` should not include the release candidate number in the name of the tarball's top-level directory. If the release candidate number is included, the binaries and the release verification tasks fail because the tarball entries have an unexpected folder hierarchy. See https://github.com/apache/arrow/pull/43188#issuecomment-2215002552. ### What changes are included in this PR? 1. Modified `dev/release/util-create-release-tarball.sh` to not include the release candidate number in the name of the source directory from which the release tarball is created. ### Are these changes tested? Manually verified this change fixes the bug: ```bash $ dev/release/utils-create-release-tarball.sh 17.0.0 1 $ tar zxvf apache-arrow-17.0.0.tar.gz ... $ ls apache-arrow-17.0.0/ apache-arrow-17.0.0.tar.gz ``` ### Are there any user-facing changes? No * GitHub Issue: #43199 Authored-by: Sarah Gilmore Signed-off-by: Raúl Cumplido --- dev/release/02-source-test.rb | 8 ++++---- dev/release/utils-create-release-tarball.sh | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index eab95c798f284..3dec19326f92b 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -22,7 +22,7 @@ class SourceTest < Test::Unit::TestCase def setup @current_commit = git_current_commit detect_versions - @tag_name = "apache-arrow-#{@release_version}-rc0" + @tag_name_no_rc = "apache-arrow-#{@release_version}" @archive_name = "apache-arrow-#{@release_version}.tar.gz" @script = File.expand_path("dev/release/02-source.sh") @tarball_script = File.expand_path("dev/release/utils-create-release-tarball.sh") @@ -50,7 +50,7 @@ def source(*targets) def test_symbolic_links source - Dir.chdir(@tag_name) do + Dir.chdir(@tag_name_no_rc) do assert_equal([], Find.find(".").find_all {|path| File.symlink?(path)}) end @@ -58,7 +58,7 @@ def test_symbolic_links def test_csharp_git_commit_information source - Dir.chdir("#{@tag_name}/csharp") do + Dir.chdir("#{@tag_name_no_rc}/csharp") do FileUtils.mv("dummy.git", "../.git") sh("dotnet", "pack", "-c", "Release") FileUtils.mv("../.git", "dummy.git") @@ -83,7 +83,7 @@ def test_csharp_git_commit_information def test_python_version source - Dir.chdir("#{@tag_name}/python") do + Dir.chdir("#{@tag_name_no_rc}/python") do sh("python3", "setup.py", "sdist") if on_release_branch? pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" diff --git a/dev/release/utils-create-release-tarball.sh b/dev/release/utils-create-release-tarball.sh index 1a0ba83639b9a..0ca57ebe78c01 100755 --- a/dev/release/utils-create-release-tarball.sh +++ b/dev/release/utils-create-release-tarball.sh @@ -30,26 +30,27 @@ version=$1 rc=$2 tag=apache-arrow-${version}-rc${rc} +root_folder=apache-arrow-${version} tarball=apache-arrow-${version}.tar.gz : ${release_hash:=$(git rev-list --max-count=1 ${tag})} -rm -rf ${tag} +rm -rf ${root_folder} # be conservative and use the release hash, even though git produces the same # archive (identical hashes) using the scm tag (cd "${SOURCE_TOP_DIR}" && \ - git archive ${release_hash} --prefix ${tag}/) | \ + git archive ${release_hash} --prefix ${root_folder}/) | \ tar xf - # Resolve symbolic and hard links -rm -rf ${tag}.tmp -mv ${tag} ${tag}.tmp -cp -R -L ${tag}.tmp ${tag} -rm -rf ${tag}.tmp +rm -rf ${root_folder}.tmp +mv ${root_folder} ${root_folder}.tmp +cp -R -L ${root_folder}.tmp ${root_folder} +rm -rf ${root_folder}.tmp # Create a dummy .git/ directory to download the source files from GitHub with Source Link in C#. -dummy_git=${tag}/csharp/dummy.git +dummy_git=${root_folder}/csharp/dummy.git mkdir ${dummy_git} pushd ${dummy_git} echo ${release_hash} > HEAD @@ -58,5 +59,5 @@ mkdir objects refs popd # Create new tarball from modified source directory -tar czf ${tarball} ${tag} -rm -rf ${tag} +tar czf ${tarball} ${root_folder} +rm -rf ${root_folder} From 89fd5664b942f0cec1c51a4a17610aac3015d080 Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Tue, 9 Jul 2024 13:28:16 -0400 Subject: [PATCH 043/122] GH-41640: [Go] Implement BYTE_STREAM_SPLIT Parquet Encoding (#43066) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change This encoding is defined by the [Parquet spec](https://github.com/apache/parquet-format/blob/master/Encodings.md#byte-stream-split-byte_stream_split--9) but does not currently have a Go implementation. ### What changes are included in this PR? Implement BYTE_STREAM_SPLIT encoder/decoder for: - FIXED_LEN_BYTE_ARRAY - FLOAT - DOUBLE - INT32 - INT64 ### Are these changes tested? Yes. See unit tests, file read conformance tests, and benchmarks. **Benchmark results on my machine** ``` ➜ go git:(impl-pq-bytestreamsplit) go test ./parquet/internal/encoding -run=^$ -bench=BenchmarkByteStreamSplit -benchmem goos: darwin goarch: arm64 pkg: github.com/apache/arrow/go/v17/parquet/internal/encoding BenchmarkByteStreamSplitEncodingInt32/len_1024-14 502117 2005 ns/op 2043.37 MB/s 5267 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt32/len_2048-14 328921 3718 ns/op 2203.54 MB/s 9879 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt32/len_4096-14 169642 7083 ns/op 2313.14 MB/s 18852 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt32/len_8192-14 82503 14094 ns/op 2324.99 MB/s 41425 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt32/len_16384-14 45006 26841 ns/op 2441.68 MB/s 74286 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt32/len_32768-14 23433 51233 ns/op 2558.33 MB/s 140093 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt32/len_65536-14 12019 99001 ns/op 2647.90 MB/s 271417 B/op 3 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_1024-14 996573 1199 ns/op 3417.00 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_2048-14 503200 2380 ns/op 3442.18 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_4096-14 252038 4748 ns/op 3450.90 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_8192-14 122419 9793 ns/op 3346.08 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_16384-14 63321 19040 ns/op 3442.00 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_32768-14 31051 38677 ns/op 3388.89 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32/len_65536-14 15792 77931 ns/op 3363.80 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_1024-14 981043 1221 ns/op 3354.53 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_2048-14 492319 2424 ns/op 3379.34 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_4096-14 248062 4850 ns/op 3378.20 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_8192-14 123064 9903 ns/op 3308.87 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_16384-14 61845 19567 ns/op 3349.29 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_32768-14 30568 39456 ns/op 3321.96 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt32Batched/len_65536-14 15172 78762 ns/op 3328.30 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_1024-14 319006 3690 ns/op 2220.13 MB/s 9880 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_2048-14 161006 7132 ns/op 2297.30 MB/s 18853 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_4096-14 85783 13925 ns/op 2353.12 MB/s 41421 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_8192-14 45015 26943 ns/op 2432.43 MB/s 74312 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_16384-14 20352 59259 ns/op 2211.84 MB/s 139940 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_32768-14 10000 111143 ns/op 2358.61 MB/s 271642 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingInt64/len_65536-14 5529 212652 ns/op 2465.47 MB/s 534805 B/op 3 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_1024-14 528987 2355 ns/op 3478.32 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_2048-14 262707 4701 ns/op 3485.08 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_4096-14 129212 9313 ns/op 3518.63 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_8192-14 53746 23315 ns/op 2810.90 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_16384-14 28782 41054 ns/op 3192.65 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_32768-14 14803 80157 ns/op 3270.39 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingInt64/len_65536-14 7484 164111 ns/op 3194.72 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_1024-14 291716 4107 ns/op 997.43 MB/s 5276 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_2048-14 148888 7975 ns/op 1027.18 MB/s 9914 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_4096-14 76587 15677 ns/op 1045.11 MB/s 18955 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_8192-14 39758 30277 ns/op 1082.26 MB/s 41752 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_16384-14 20306 59506 ns/op 1101.33 MB/s 74937 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_32768-14 10000 116043 ns/op 1129.52 MB/s 141290 B/op 3 allocs/op BenchmarkByteStreamSplitEncodingFixedLenByteArray/len_65536-14 4770 236887 ns/op 1106.62 MB/s 277583 B/op 3 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_1024-14 601875 1723 ns/op 2376.70 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_2048-14 363206 3422 ns/op 2394.18 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_4096-14 173041 6906 ns/op 2372.45 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_8192-14 81810 14307 ns/op 2290.40 MB/s 0 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_16384-14 40518 29101 ns/op 2252.04 MB/s 1 B/op 0 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_32768-14 21338 56678 ns/op 2312.58 MB/s 6 B/op 1 allocs/op BenchmarkByteStreamSplitDecodingFixedLenByteArray/len_65536-14 10000 111433 ns/op 2352.49 MB/s 26 B/op 6 allocs/op PASS ok github.com/apache/arrow/go/v17/parquet/internal/encoding 69.109s ``` ### Are there any user-facing changes? New ByteStreamSplit encoding option available. Godoc updated to reflect this. * GitHub Issue: #41640 Authored-by: Joel Lubinitsky Signed-off-by: Matt Topol --- go/parquet/doc.go | 14 +- go/parquet/file/column_reader.go | 5 +- go/parquet/file/column_writer_test.go | 9 + go/parquet/file/file_reader_test.go | 136 ++++++ go/parquet/file/file_writer_test.go | 76 ++++ .../internal/encoding/byte_stream_split.go | 389 ++++++++++++++++++ .../encoding/encoding_benchmarks_test.go | 170 ++++++++ go/parquet/internal/encoding/encoding_test.go | 11 + .../encoding/fixed_len_byte_array_decoder.go | 71 ++++ .../encoding/fixed_len_byte_array_encoder.go | 39 ++ .../internal/encoding/typed_encoder.gen.go | 20 + .../encoding/typed_encoder.gen.go.tmpl | 8 + go/parquet/types.go | 2 + 13 files changed, 945 insertions(+), 5 deletions(-) create mode 100644 go/parquet/internal/encoding/byte_stream_split.go diff --git a/go/parquet/doc.go b/go/parquet/doc.go index 6ab08f83f063f..c580b8e317a67 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -60,8 +60,18 @@ // # Encodings // // The encoding types supported in this package are: -// Plain, Plain/RLE Dictionary, Delta Binary Packed (only integer types), Delta Byte Array -// (only ByteArray), Delta Length Byte Array (only ByteArray) +// +// - Plain +// +// - Plain/RLE Dictionary +// +// - Delta Binary Packed (only integer types) +// +// - Delta Byte Array (only ByteArray) +// +// - Delta Length Byte Array (only ByteArray) +// +// - Byte Stream Split (Float, Double, Int32, Int64, FixedLenByteArray) // // Tip: Some platforms don't necessarily support all kinds of encodings. If you're not // sure what to use, just use Plain and Dictionary encoding. diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go index e441cd3e9c2d2..74a1b4486a703 100644 --- a/go/parquet/file/column_reader.go +++ b/go/parquet/file/column_reader.go @@ -354,13 +354,12 @@ func (c *columnChunkReader) initDataDecoder(page Page, lvlByteLen int64) error { case format.Encoding_PLAIN, format.Encoding_DELTA_BYTE_ARRAY, format.Encoding_DELTA_LENGTH_BYTE_ARRAY, - format.Encoding_DELTA_BINARY_PACKED: + format.Encoding_DELTA_BINARY_PACKED, + format.Encoding_BYTE_STREAM_SPLIT: c.curDecoder = c.decoderTraits.Decoder(parquet.Encoding(encoding), c.descr, false, c.mem) c.decoders[encoding] = c.curDecoder case format.Encoding_RLE_DICTIONARY: return errors.New("parquet: dictionary page must be before data page") - case format.Encoding_BYTE_STREAM_SPLIT: - return fmt.Errorf("parquet: unsupported data encoding %s", encoding) default: return fmt.Errorf("parquet: unknown encoding type %s", encoding) } diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index c8d61952064fe..cd2408f4fba5d 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -459,6 +459,15 @@ func (p *PrimitiveWriterTestSuite) TestRequiredPlain() { p.testRequiredWithEncoding(parquet.Encodings.Plain) } +func (p *PrimitiveWriterTestSuite) TestRequiredByteStreamSplit() { + switch p.Typ { + case reflect.TypeOf(float32(0)), reflect.TypeOf(float64(0)), reflect.TypeOf(int32(0)), reflect.TypeOf(int64(0)), reflect.TypeOf(parquet.FixedLenByteArray{}): + p.testRequiredWithEncoding(parquet.Encodings.ByteStreamSplit) + default: + p.Panics(func() { p.testRequiredWithEncoding(parquet.Encodings.ByteStreamSplit) }) + } +} + func (p *PrimitiveWriterTestSuite) TestRequiredDictionary() { p.testRequiredWithEncoding(parquet.Encodings.PlainDict) } diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index 8056a837ea19e..d4faf26086f93 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -20,6 +20,7 @@ import ( "bytes" "crypto/rand" "encoding/binary" + "fmt" "io" "os" "path" @@ -446,3 +447,138 @@ func TestRleBooleanEncodingFileRead(t *testing.T) { assert.Equal(t, expected, values[:len(expected)]) } + +func TestByteStreamSplitEncodingFileRead(t *testing.T) { + dir := os.Getenv("PARQUET_TEST_DATA") + if dir == "" { + t.Skip("no path supplied with PARQUET_TEST_DATA") + } + require.DirExists(t, dir) + + props := parquet.NewReaderProperties(memory.DefaultAllocator) + fileReader, err := file.OpenParquetFile(path.Join(dir, "byte_stream_split_extended.gzip.parquet"), + false, file.WithReadProps(props)) + require.NoError(t, err) + defer fileReader.Close() + + nRows := 200 + nCols := 14 + require.Equal(t, 1, fileReader.NumRowGroups()) + rgr := fileReader.RowGroup(0) + require.EqualValues(t, nRows, rgr.NumRows()) + require.EqualValues(t, nCols, rgr.NumColumns()) + + // Helper to unpack values from column of a specific type + getValues := func(rdr file.ColumnChunkReader, typ parquet.Type) any { + var ( + vals any + total int64 + read int + err error + ) + + switch typ { + case parquet.Types.FixedLenByteArray: + r, ok := rdr.(*file.FixedLenByteArrayColumnChunkReader) + require.True(t, ok) + + values := make([]parquet.FixedLenByteArray, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Float: + r, ok := rdr.(*file.Float32ColumnChunkReader) + require.True(t, ok) + + values := make([]float32, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Double: + r, ok := rdr.(*file.Float64ColumnChunkReader) + require.True(t, ok) + + values := make([]float64, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Int32: + r, ok := rdr.(*file.Int32ColumnChunkReader) + require.True(t, ok) + + values := make([]int32, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + case parquet.Types.Int64: + r, ok := rdr.(*file.Int64ColumnChunkReader) + require.True(t, ok) + + values := make([]int64, nRows) + total, read, err = r.ReadBatch(int64(nRows), values, nil, nil) + vals = values + default: + t.Fatalf("unrecognized parquet type: %s", typ) + } + + require.NoError(t, err) + require.EqualValues(t, nRows, total) + require.EqualValues(t, nRows, read) + + return vals + } + + // Test conformance against Parquet reference + // Expected structure: https://github.com/apache/parquet-testing/blob/1bf4bd39df2135d132451c281754268f03dc1c0e/data/README.md?plain=1#L358 + for i, tc := range []struct { + PhysicalType parquet.Type + LogicalType schema.LogicalType + }{ + { + PhysicalType: parquet.Types.FixedLenByteArray, + LogicalType: schema.Float16LogicalType{}, + }, + { + PhysicalType: parquet.Types.Float, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.Double, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.Int32, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.Int64, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.FixedLenByteArray, + LogicalType: schema.NoLogicalType{}, + }, + { + PhysicalType: parquet.Types.FixedLenByteArray, + LogicalType: schema.NewDecimalLogicalType(7, 3), + }, + } { + t.Run(fmt.Sprintf("(Physical:%s/Logical:%s)", tc.PhysicalType, tc.LogicalType), func(t *testing.T) { + // Iterate through pairs of adjacent columns + colIdx := 2 * i + + // Read Plain-encoded column + rdrPlain, err := rgr.Column(colIdx) + require.NoError(t, err) + + // Read ByteStreamSplit-encoded column + rdrByteStreamSplit, err := rgr.Column(colIdx + 1) + require.NoError(t, err) + + // Logical types match + require.True(t, rdrPlain.Descriptor().LogicalType().Equals(tc.LogicalType)) + require.True(t, rdrByteStreamSplit.Descriptor().LogicalType().Equals(tc.LogicalType)) + + // Decoded values match + valuesPlain := getValues(rdrPlain, tc.PhysicalType) + valuesByteStreamSplit := getValues(rdrByteStreamSplit, tc.PhysicalType) + require.Equal(t, valuesPlain, valuesByteStreamSplit) + }) + } +} diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index a183022357d62..e5ad1b07e25de 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -464,3 +464,79 @@ func TestCloseError(t *testing.T) { writer := file.NewParquetWriter(sink, sc) assert.Error(t, writer.Close()) } + +func TestBatchedByteStreamSplitFileRoundtrip(t *testing.T) { + input := []parquet.FixedLenByteArray{ + {1, 2}, + {3, 4}, + {5, 6}, + {7, 8}, + } + + size := len(input) + chunk := size / 2 + + props := parquet.NewWriterProperties( + parquet.WithEncoding(parquet.Encodings.ByteStreamSplit), + parquet.WithDictionaryDefault(false), + parquet.WithBatchSize(int64(chunk)), + parquet.WithDataPageSize(int64(size)*2), + ) + + field, err := schema.NewPrimitiveNodeLogical("f16", parquet.Repetitions.Required, schema.Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2, 1) + require.NoError(t, err) + + schema, err := schema.NewGroupNode("test", parquet.Repetitions.Required, schema.FieldList{field}, 0) + require.NoError(t, err) + + sink := encoding.NewBufferWriter(0, memory.DefaultAllocator) + writer := file.NewParquetWriter(sink, schema, file.WithWriterProps(props)) + + rgw := writer.AppendRowGroup() + cw, err := rgw.NextColumn() + require.NoError(t, err) + + f16ColumnWriter, ok := cw.(*file.FixedLenByteArrayColumnChunkWriter) + require.True(t, ok) + + nVals, err := f16ColumnWriter.WriteBatch(input[:chunk], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, nVals) + + nVals, err = f16ColumnWriter.WriteBatch(input[chunk:], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, nVals) + + require.NoError(t, cw.Close()) + require.NoError(t, rgw.Close()) + require.NoError(t, writer.Close()) + + rdr, err := file.NewParquetReader(bytes.NewReader(sink.Bytes())) + require.NoError(t, err) + + require.Equal(t, 1, rdr.NumRowGroups()) + require.EqualValues(t, size, rdr.NumRows()) + + rgr := rdr.RowGroup(0) + cr, err := rgr.Column(0) + require.NoError(t, err) + + f16ColumnReader, ok := cr.(*file.FixedLenByteArrayColumnChunkReader) + require.True(t, ok) + + output := make([]parquet.FixedLenByteArray, size) + + total, valuesRead, err := f16ColumnReader.ReadBatch(int64(chunk), output[:chunk], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, total) + require.EqualValues(t, chunk, valuesRead) + + total, valuesRead, err = f16ColumnReader.ReadBatch(int64(chunk), output[chunk:], nil, nil) + require.NoError(t, err) + require.EqualValues(t, chunk, total) + require.EqualValues(t, chunk, valuesRead) + + require.Equal(t, input, output) + + require.NoError(t, rdr.Close()) +} diff --git a/go/parquet/internal/encoding/byte_stream_split.go b/go/parquet/internal/encoding/byte_stream_split.go new file mode 100644 index 0000000000000..3772aa876173f --- /dev/null +++ b/go/parquet/internal/encoding/byte_stream_split.go @@ -0,0 +1,389 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "fmt" + "math" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v17/parquet/internal/debug" + "golang.org/x/xerrors" +) + +// encodeByteStreamSplit encodes the raw bytes provided by 'in' into the output buffer 'data' using BYTE_STREAM_SPLIT encoding. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplit(data []byte, in []byte, width int) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + numElements := len(in) / width + for stream := 0; stream < width; stream++ { + for element := 0; element < numElements; element++ { + encLoc := numElements*stream + element + decLoc := width*element + stream + data[encLoc] = in[decLoc] + } + } +} + +// encodeByteStreamSplitWidth2 implements encodeByteStreamSplit optimized for types stored using 2 bytes. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplitWidth2(data []byte, in []byte) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + const width = 2 + numElements := len(in) / width + for element := 0; element < numElements; element++ { + decLoc := width * element + data[element] = in[decLoc] + data[numElements+element] = in[decLoc+1] + } +} + +// encodeByteStreamSplitWidth4 implements encodeByteStreamSplit optimized for types stored using 4 bytes. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplitWidth4(data []byte, in []byte) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + const width = 4 + numElements := len(in) / width + for element := 0; element < numElements; element++ { + decLoc := width * element + data[element] = in[decLoc] + data[numElements+element] = in[decLoc+1] + data[numElements*2+element] = in[decLoc+2] + data[numElements*3+element] = in[decLoc+3] + } +} + +// encodeByteStreamSplitWidth8 implements encodeByteStreamSplit optimized for types stored using 8 bytes. +// 'data' must have space for at least len(in) bytes. +func encodeByteStreamSplitWidth8(data []byte, in []byte) { + debug.Assert(len(data) >= len(in), fmt.Sprintf("not enough space in destination buffer for encoding, dest: %d bytes, src: %d bytes", len(data), len(in))) + const width = 8 + numElements := len(in) / width + for element := 0; element < numElements; element++ { + decLoc := width * element + data[element] = in[decLoc] + data[numElements+element] = in[decLoc+1] + data[numElements*2+element] = in[decLoc+2] + data[numElements*3+element] = in[decLoc+3] + data[numElements*4+element] = in[decLoc+4] + data[numElements*5+element] = in[decLoc+5] + data[numElements*6+element] = in[decLoc+6] + data[numElements*7+element] = in[decLoc+7] + } +} + +// decodeByteStreamSplitBatchWidth4 decodes the batch of nValues raw bytes representing a 4-byte datatype provided by 'data', +// into the output buffer 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least len(data) bytes. +func decodeByteStreamSplitBatchWidth4(data []byte, nValues, stride int, out []byte) { + debug.Assert(len(out) >= len(data), fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data))) + const width = 4 + for element := 0; element < nValues; element++ { + out[width*element] = data[element] + out[width*element+1] = data[stride+element] + out[width*element+2] = data[2*stride+element] + out[width*element+3] = data[3*stride+element] + } +} + +// decodeByteStreamSplitBatchWidth8 decodes the batch of nValues raw bytes representing a 8-byte datatype provided by 'data', +// into the output buffer 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least len(data) bytes. +func decodeByteStreamSplitBatchWidth8(data []byte, nValues, stride int, out []byte) { + debug.Assert(len(out) >= len(data), fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data))) + const width = 8 + for element := 0; element < nValues; element++ { + out[width*element] = data[element] + out[width*element+1] = data[stride+element] + out[width*element+2] = data[2*stride+element] + out[width*element+3] = data[3*stride+element] + out[width*element+4] = data[4*stride+element] + out[width*element+5] = data[5*stride+element] + out[width*element+6] = data[6*stride+element] + out[width*element+7] = data[7*stride+element] + } +} + +// decodeByteStreamSplitBatchFLBA decodes the batch of nValues FixedLenByteArrays provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBA(data []byte, nValues, stride, width int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for stream := 0; stream < width; stream++ { + for element := 0; element < nValues; element++ { + encLoc := stride*stream + element + out[element][stream] = data[encLoc] + } + } +} + +// decodeByteStreamSplitBatchFLBAWidth2 decodes the batch of nValues FixedLenByteArrays of length 2 provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBAWidth2(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for element := 0; element < nValues; element++ { + out[element][0] = data[element] + out[element][1] = data[stride+element] + } +} + +// decodeByteStreamSplitBatchFLBAWidth4 decodes the batch of nValues FixedLenByteArrays of length 4 provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBAWidth4(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for element := 0; element < nValues; element++ { + out[element][0] = data[element] + out[element][1] = data[stride+element] + out[element][2] = data[stride*2+element] + out[element][3] = data[stride*3+element] + } +} + +// decodeByteStreamSplitBatchFLBAWidth8 decodes the batch of nValues FixedLenByteArrays of length 8 provided by 'data', +// into the output slice 'out' using BYTE_STREAM_SPLIT encoding. +// 'out' must have space for at least nValues slices. +func decodeByteStreamSplitBatchFLBAWidth8(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) { + debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues)) + for element := 0; element < nValues; element++ { + out[element][0] = data[element] + out[element][1] = data[stride+element] + out[element][2] = data[stride*2+element] + out[element][3] = data[stride*3+element] + out[element][4] = data[stride*4+element] + out[element][5] = data[stride*5+element] + out[element][6] = data[stride*6+element] + out[element][7] = data[stride*7+element] + } +} + +func releaseBufferToPool(pooled *PooledBufferWriter) { + buf := pooled.buf + memory.Set(buf.Buf(), 0) + buf.ResizeNoShrink(0) + bufferPool.Put(buf) +} + +func validateByteStreamSplitPageData(typeLen, nvals int, data []byte) (int, error) { + if nvals*typeLen < len(data) { + return 0, fmt.Errorf("data size (%d) is too small for the number of values in in BYTE_STREAM_SPLIT (%d)", len(data), nvals) + } + + if len(data)%typeLen != 0 { + return 0, fmt.Errorf("ByteStreamSplit data size %d not aligned with byte_width: %d", len(data), typeLen) + } + + return len(data) / typeLen, nil +} + +// ByteStreamSplitFloat32Encoder writes the underlying bytes of the Float32 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitFloat32Encoder struct { + PlainFloat32Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitFloat32Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainFloat32Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth4(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitFloat32Encoder) Release() { + enc.PlainFloat32Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitFloat64Encoder writes the underlying bytes of the Float64 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitFloat64Encoder struct { + PlainFloat64Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitFloat64Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainFloat64Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth8(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitFloat64Encoder) Release() { + enc.PlainFloat64Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitInt32Encoder writes the underlying bytes of the Int32 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitInt32Encoder struct { + PlainInt32Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitInt32Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainInt32Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth4(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitInt32Encoder) Release() { + enc.PlainInt32Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitInt64Encoder writes the underlying bytes of the Int64 +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitInt64Encoder struct { + PlainInt64Encoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitInt64Encoder) FlushValues() (Buffer, error) { + in, err := enc.PlainInt64Encoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.Resize(in.Len()) + encodeByteStreamSplitWidth8(enc.flushBuffer.Bytes(), in.Bytes()) + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitInt64Encoder) Release() { + enc.PlainInt64Encoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + +// ByteStreamSplitFloat32Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Float32 values +type ByteStreamSplitFloat32Decoder = ByteStreamSplitDecoder[float32] + +// ByteStreamSplitFloat64Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Float64 values +type ByteStreamSplitFloat64Decoder = ByteStreamSplitDecoder[float64] + +// ByteStreamSplitInt32Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Int32 values +type ByteStreamSplitInt32Decoder = ByteStreamSplitDecoder[int32] + +// ByteStreamSplitInt64Decoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing Int64 values +type ByteStreamSplitInt64Decoder = ByteStreamSplitDecoder[int64] + +type ByteStreamSplitDecoder[T float32 | float64 | int32 | int64] struct { + decoder + stride int +} + +func (dec *ByteStreamSplitDecoder[T]) Type() parquet.Type { + switch v := any(dec).(type) { + case *ByteStreamSplitDecoder[float32]: + return parquet.Types.Float + case *ByteStreamSplitDecoder[float64]: + return parquet.Types.Double + case *ByteStreamSplitDecoder[int32]: + return parquet.Types.Int32 + case *ByteStreamSplitDecoder[int64]: + return parquet.Types.Int64 + default: + panic(fmt.Sprintf("ByteStreamSplitDecoder is not supported for type: %T", v)) + } +} + +func (dec *ByteStreamSplitDecoder[T]) SetData(nvals int, data []byte) error { + nvals, err := validateByteStreamSplitPageData(dec.Type().ByteSize(), nvals, data) + if err != nil { + return err + } + + dec.stride = nvals + return dec.decoder.SetData(nvals, data) +} + +func (dec *ByteStreamSplitDecoder[T]) Decode(out []T) (int, error) { + typeLen := dec.Type().ByteSize() + toRead := len(out) + numBytesNeeded := toRead * typeLen + if numBytesNeeded > len(dec.data) || numBytesNeeded > math.MaxInt32 { + return 0, xerrors.New("parquet: eof exception") + } + + outBytes := arrow.GetBytes(out) + switch typeLen { + case 4: + decodeByteStreamSplitBatchWidth4(dec.data, toRead, dec.stride, outBytes) + case 8: + decodeByteStreamSplitBatchWidth8(dec.data, toRead, dec.stride, outBytes) + default: + return 0, fmt.Errorf("encoding ByteStreamSplit is only defined for numeric type of width 4 or 8, found: %d", typeLen) + } + + dec.nvals -= toRead + dec.data = dec.data[toRead:] + + return toRead, nil +} + +func (dec *ByteStreamSplitDecoder[T]) DecodeSpaced(out []T, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toRead := len(out) - nullCount + valuesRead, err := dec.Decode(out[:toRead]) + if err != nil { + return valuesRead, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go index 0252aa4801430..6bf0aec0d2035 100644 --- a/go/parquet/internal/encoding/encoding_benchmarks_test.go +++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go @@ -464,3 +464,173 @@ func BenchmarkDecodeDictByteArray(b *testing.B) { dictDec.Decode(out) } } + +func BenchmarkByteStreamSplitEncodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int32, sz) + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.Int32Decoder).Decode(output) + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingInt32Batched(b *testing.B) { + const batchSize = 512 + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int32, sz) + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.ByteStreamSplit, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + for batch := 0; batch*batchSize < sz; batch++ { + offset := batch * batchSize + decoder.(encoding.Int32Decoder).Decode(output[offset : offset+batchSize]) + } + } + }) + } +} + +func BenchmarkByteStreamSplitEncodingInt64(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]int64, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int64, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int64Encoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int64SizeBytes)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingInt64(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int64, sz) + values := make([]int64, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int64, parquet.Encodings.ByteStreamSplit, + false, nil, memory.DefaultAllocator).(encoding.Int64Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int64, parquet.Encodings.ByteStreamSplit, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int64SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.Int64Decoder).Decode(output) + } + }) + } +} + +func BenchmarkByteStreamSplitEncodingFixedLenByteArray(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]parquet.FixedLenByteArray, sz) + for idx := range values { + values[idx] = []byte{0x12, 0x34, 0x56, 0x78} + } + + arraySize := len(values[0]) + col := schema.NewColumn(schema.NewFixedLenByteArrayNode("fixedlenbytearray", parquet.Repetitions.Required, int32(arraySize), -1), 0, 0) + encoder := encoding.NewEncoder(parquet.Types.FixedLenByteArray, parquet.Encodings.ByteStreamSplit, + false, col, memory.DefaultAllocator).(encoding.FixedLenByteArrayEncoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arraySize)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkByteStreamSplitDecodingFixedLenByteArray(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]parquet.FixedLenByteArray, sz) + values := make([]parquet.FixedLenByteArray, sz) + for idx := range values { + values[idx] = []byte{0x12, 0x34, 0x56, 0x78} + } + + arraySize := len(values[0]) + col := schema.NewColumn(schema.NewFixedLenByteArrayNode("fixedlenbytearray", parquet.Repetitions.Required, int32(arraySize), -1), 0, 0) + encoder := encoding.NewEncoder(parquet.Types.FixedLenByteArray, parquet.Encodings.ByteStreamSplit, + false, col, memory.DefaultAllocator).(encoding.FixedLenByteArrayEncoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.FixedLenByteArray, parquet.Encodings.ByteStreamSplit, col, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arraySize)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.FixedLenByteArrayDecoder).Decode(output) + } + }) + } +} diff --git a/go/parquet/internal/encoding/encoding_test.go b/go/parquet/internal/encoding/encoding_test.go index f2d1e31236adf..5e95ab16e2676 100644 --- a/go/parquet/internal/encoding/encoding_test.go +++ b/go/parquet/internal/encoding/encoding_test.go @@ -406,6 +406,17 @@ func (b *BaseEncodingTestSuite) TestDeltaByteArrayRoundTrip() { } } +func (b *BaseEncodingTestSuite) TestByteStreamSplitRoundTrip() { + b.initData(10000, 1) + + switch b.typ { + case reflect.TypeOf(float32(0)), reflect.TypeOf(float64(0)), reflect.TypeOf(int32(0)), reflect.TypeOf(int64(0)), reflect.TypeOf(parquet.FixedLenByteArray{}): + b.checkRoundTrip(parquet.Encodings.ByteStreamSplit) + default: + b.Panics(func() { b.checkRoundTrip(parquet.Encodings.ByteStreamSplit) }) + } +} + func (b *BaseEncodingTestSuite) TestSpacedRoundTrip() { exec := func(vals, repeats int, validBitsOffset int64, nullProb float64) { b.Run(fmt.Sprintf("%d vals %d repeats %d offset %0.3f null", vals, repeats, validBitsOffset, 1-nullProb), func() { diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go index ceb9f5a2e4a22..e264697a8c547 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -17,6 +17,7 @@ package encoding import ( + "fmt" "math" "github.com/apache/arrow/go/v17/internal/utils" @@ -64,3 +65,73 @@ func (pflba *PlainFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenB return spacedExpand(out, nullCount, validBits, validBitsOffset), nil } + +// ByteStreamSplitFixedLenByteArrayDecoder is a decoder for BYTE_STREAM_SPLIT-encoded +// bytes representing FixedLenByteArray values +type ByteStreamSplitFixedLenByteArrayDecoder struct { + decoder + stride int +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) Type() parquet.Type { + return parquet.Types.FixedLenByteArray +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) SetData(nvals int, data []byte) error { + if nvals*dec.typeLen < len(data) { + return fmt.Errorf("data size (%d) is too small for the number of values in in BYTE_STREAM_SPLIT (%d)", len(data), nvals) + } + + if len(data)%dec.typeLen != 0 { + return fmt.Errorf("ByteStreamSplit data size %d not aligned with type %s and byte_width: %d", len(data), dec.Type(), dec.typeLen) + } + + nvals = len(data) / dec.typeLen + dec.stride = nvals + + return dec.decoder.SetData(nvals, data) +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { + toRead := len(out) + numBytesNeeded := toRead * dec.typeLen + if numBytesNeeded > len(dec.data) || numBytesNeeded > math.MaxInt32 { + return 0, xerrors.New("parquet: eof exception") + } + + for i := range out { + if cap(out[i]) < dec.typeLen { + out[i] = make(parquet.FixedLenByteArray, dec.typeLen) + } else { + out[i] = out[i][:dec.typeLen] + } + } + + switch dec.typeLen { + case 2: + decodeByteStreamSplitBatchFLBAWidth2(dec.data, toRead, dec.stride, out) + case 4: + decodeByteStreamSplitBatchFLBAWidth4(dec.data, toRead, dec.stride, out) + case 8: + decodeByteStreamSplitBatchFLBAWidth8(dec.data, toRead, dec.stride, out) + default: + decodeByteStreamSplitBatchFLBA(dec.data, toRead, dec.stride, dec.typeLen, out) + } + + dec.nvals -= toRead + dec.data = dec.data[toRead:] + return toRead, nil +} + +func (dec *ByteStreamSplitFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { + toRead := len(out) - nullCount + valuesRead, err := dec.Decode(out[:toRead]) + if err != nil { + return valuesRead, err + } + if valuesRead != toRead { + return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match") + } + + return spacedExpand(out, nullCount, validBits, validBitsOffset), nil +} diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go index 1cdb3c84d9212..a93164e305fdf 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go @@ -75,6 +75,45 @@ func (PlainFixedLenByteArrayEncoder) Type() parquet.Type { return parquet.Types.FixedLenByteArray } +// ByteStreamSplitFixedLenByteArrayEncoder writes the underlying bytes of the FixedLenByteArray +// into interlaced streams as defined by the BYTE_STREAM_SPLIT encoding +type ByteStreamSplitFixedLenByteArrayEncoder struct { + PlainFixedLenByteArrayEncoder + flushBuffer *PooledBufferWriter +} + +func (enc *ByteStreamSplitFixedLenByteArrayEncoder) FlushValues() (Buffer, error) { + in, err := enc.PlainFixedLenByteArrayEncoder.FlushValues() + if err != nil { + return nil, err + } + + if enc.flushBuffer == nil { + enc.flushBuffer = NewPooledBufferWriter(in.Len()) + } + + enc.flushBuffer.buf.ResizeNoShrink(in.Len()) + + switch enc.typeLen { + case 2: + encodeByteStreamSplitWidth2(enc.flushBuffer.Bytes(), in.Bytes()) + case 4: + encodeByteStreamSplitWidth4(enc.flushBuffer.Bytes(), in.Bytes()) + case 8: + encodeByteStreamSplitWidth8(enc.flushBuffer.Bytes(), in.Bytes()) + default: + encodeByteStreamSplit(enc.flushBuffer.Bytes(), in.Bytes(), enc.typeLen) + } + + return enc.flushBuffer.Finish(), nil +} + +func (enc *ByteStreamSplitFixedLenByteArrayEncoder) Release() { + enc.PlainFixedLenByteArrayEncoder.Release() + releaseBufferToPool(enc.flushBuffer) + enc.flushBuffer = nil +} + // WriteDict overrides the embedded WriteDict function to call a specialized function // for copying out the Fixed length values from the dictionary more efficiently. func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) { diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index 0c473a989ef71..663c1164c565e 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -88,6 +88,8 @@ func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema case format.Encoding_DELTA_BINARY_PACKED: return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{ encoder: newEncoderBase(e, descr, mem)}} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitInt32Encoder{PlainInt32Encoder: PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -120,6 +122,8 @@ func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD decoder: newDecoderBase(format.Encoding(e), descr), mem: mem, }} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -325,6 +329,8 @@ func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema case format.Encoding_DELTA_BINARY_PACKED: return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{ encoder: newEncoderBase(e, descr, mem)}} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitInt64Encoder{PlainInt64Encoder: PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -357,6 +363,8 @@ func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD decoder: newDecoderBase(format.Encoding(e), descr), mem: mem, }} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -774,6 +782,8 @@ func (float32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sche switch e { case format.Encoding_PLAIN: return &PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitFloat32Encoder{PlainFloat32Encoder: PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -797,6 +807,8 @@ func (float32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, us switch e { case parquet.Encodings.Plain: return &PlainFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -999,6 +1011,8 @@ func (float64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sche switch e { case format.Encoding_PLAIN: return &PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitFloat64Encoder{PlainFloat64Encoder: PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -1022,6 +1036,8 @@ func (float64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, us switch e { case parquet.Encodings.Plain: return &PlainFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } @@ -1492,6 +1508,8 @@ func (fixedLenByteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, d switch e { case format.Encoding_PLAIN: return &PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplitFixedLenByteArrayEncoder{PlainFixedLenByteArrayEncoder: PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)}} default: panic("unimplemented encoding type") } @@ -1515,6 +1533,8 @@ func (fixedLenByteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema. switch e { case parquet.Encodings.Plain: return &PlainFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplitFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: panic("unimplemented encoding type") } diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index 57d7e641fb5df..ebd7733135a52 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -93,6 +93,10 @@ func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *s return &DeltaByteArrayEncoder{ encoder: newEncoderBase(e, descr, mem), } +{{- end}} +{{- if or (eq .Name "FixedLenByteArray") (eq .Name "Float32") (eq .Name "Float64") (eq .Name "Int32") (eq .Name "Int64")}} + case format.Encoding_BYTE_STREAM_SPLIT: + return &ByteStreamSplit{{.Name}}Encoder{Plain{{.Name}}Encoder: Plain{{.Name}}Encoder{encoder: newEncoderBase(e,descr,mem)}} {{- end}} default: panic("unimplemented encoding type") @@ -154,6 +158,10 @@ func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, decoder: newDecoderBase(format.Encoding(e), descr), mem: mem, }} +{{- end}} +{{- if or (eq .Name "FixedLenByteArray") (eq .Name "Float32") (eq .Name "Float64") (eq .Name "Int32") (eq .Name "Int64")}} + case parquet.Encodings.ByteStreamSplit: + return &ByteStreamSplit{{.Name}}Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} {{- end}} default: panic("unimplemented encoding type") diff --git a/go/parquet/types.go b/go/parquet/types.go index c1ab3788ca577..71336a7987cd8 100644 --- a/go/parquet/types.go +++ b/go/parquet/types.go @@ -296,6 +296,7 @@ var ( DeltaByteArray Encoding DeltaBinaryPacked Encoding DeltaLengthByteArray Encoding + ByteStreamSplit Encoding }{ Plain: Encoding(format.Encoding_PLAIN), PlainDict: Encoding(format.Encoding_PLAIN_DICTIONARY), @@ -305,6 +306,7 @@ var ( DeltaByteArray: Encoding(format.Encoding_DELTA_BYTE_ARRAY), DeltaBinaryPacked: Encoding(format.Encoding_DELTA_BINARY_PACKED), DeltaLengthByteArray: Encoding(format.Encoding_DELTA_LENGTH_BYTE_ARRAY), + ByteStreamSplit: Encoding(format.Encoding_BYTE_STREAM_SPLIT), } // ColumnOrders contains constants for the Column Ordering fields From 031497ddab5ce497c49b87d7125bf5c01bf80f48 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 10 Jul 2024 10:29:09 +0800 Subject: [PATCH 044/122] GH-43143: [C++][Parquet] Default initialize some parquet metadata variables (#43144) ### Rationale for this change Default initialize some parquet metadata variables ### What changes are included in this PR? Default initialize some parquet metadata variables ### Are these changes tested? Covered by existing ### Are there any user-facing changes? no * GitHub Issue: #43143 Authored-by: mwish Signed-off-by: Sutou Kouhei --- cpp/src/parquet/metadata.cc | 6 +++--- cpp/src/parquet/schema.cc | 6 +++--- cpp/src/parquet/schema.h | 12 +++++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index b24883cdc160b..d7be50a6116bd 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -1360,9 +1360,9 @@ class ApplicationVersionParser { // For parsing. std::string spaces_; std::string digits_; - size_t version_parsing_position_; - size_t version_start_; - size_t version_end_; + size_t version_parsing_position_{0}; + size_t version_start_{0}; + size_t version_end_{0}; std::string version_string_; }; } // namespace diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc index 4ddeef9e83975..47fa72d829658 100644 --- a/cpp/src/parquet/schema.cc +++ b/cpp/src/parquet/schema.cc @@ -600,7 +600,7 @@ class SchemaVisitor : public Node::ConstVisitor { void Visit(const Node* node) override { format::SchemaElement element; node->ToParquet(&element); - elements_->push_back(element); + elements_->push_back(std::move(element)); if (node->is_group()) { const GroupNode* group_node = static_cast(node); @@ -671,7 +671,7 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream) { static void PrintConvertedType(const PrimitiveNode* node, std::ostream& stream) { auto lt = node->converted_type(); - auto la = node->logical_type(); + const auto& la = node->logical_type(); if (la && la->is_valid() && !la->is_none()) { stream << " (" << la->ToString() << ")"; } else if (lt == ConvertedType::DECIMAL) { @@ -718,7 +718,7 @@ struct SchemaPrinter : public Node::ConstVisitor { stream_ << " group " << "field_id=" << node->field_id() << " " << node->name(); auto lt = node->converted_type(); - auto la = node->logical_type(); + const auto& la = node->logical_type(); if (la && la->is_valid() && !la->is_none()) { stream_ << " (" << la->ToString() << ")"; } else if (lt != ConvertedType::NONE) { diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h index 896ec1e47968d..1addc73bd367d 100644 --- a/cpp/src/parquet/schema.h +++ b/cpp/src/parquet/schema.h @@ -175,7 +175,7 @@ class PARQUET_EXPORT Node { Node::type type_; std::string name_; Repetition::type repetition_; - ConvertedType::type converted_type_; + ConvertedType::type converted_type_{ConvertedType::NONE}; std::shared_ptr logical_type_; int field_id_; // Nodes should not be shared, they have a single parent. @@ -280,7 +280,8 @@ class PARQUET_EXPORT GroupNode : public Node { const NodeVector& fields, std::shared_ptr logical_type, int field_id = -1) { - return NodePtr(new GroupNode(name, repetition, fields, logical_type, field_id)); + return NodePtr( + new GroupNode(name, repetition, fields, std::move(logical_type), field_id)); } bool Equals(const Node* other) const override; @@ -376,7 +377,7 @@ class PARQUET_EXPORT ColumnDescriptor { ColumnOrder column_order() const { return primitive_node_->column_order(); } SortOrder::type sort_order() const { - auto la = logical_type(); + const auto& la = logical_type(); auto pt = physical_type(); return la ? GetSortOrder(la, pt) : GetSortOrder(converted_type(), pt); } @@ -416,8 +417,8 @@ class PARQUET_EXPORT ColumnDescriptor { // TODO(wesm): this object can be recomputed from a Schema class PARQUET_EXPORT SchemaDescriptor { public: - SchemaDescriptor() {} - ~SchemaDescriptor() {} + SchemaDescriptor() = default; + ~SchemaDescriptor() = default; // Analyze the schema void Init(std::unique_ptr schema); @@ -464,6 +465,7 @@ class PARQUET_EXPORT SchemaDescriptor { // Root Node schema::NodePtr schema_; // Root Node + // Would never be NULLPTR. const schema::GroupNode* group_node_; void BuildTree(const schema::NodePtr& node, int16_t max_def_level, From 3b7ad9d4e94f3eb1abd1832a9b162c8e5a9b4b4e Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 10 Jul 2024 18:55:12 +0800 Subject: [PATCH 045/122] GH-43129: [C++][Compute] Fix the unnecessary allocation of extra bytes when encoding row table (#43125) ### Rationale for this change As described in #43129 , current row table occupies more memory than expected. The memory consumption is double of necessary. The reason listed below. When encoding var length columns into into the row table: https://github.com/apache/arrow/blob/e59832fb05dc40a85fa63297c77c8f134c9ac8e0/cpp/src/arrow/compute/row/encode_internal.cc#L155-L162 We first call `AppendEmpty` to reserve space for `x` rows but `0` bytes. This is to reserve enough size for the underlying fixed-length buffers: null masks and offsets (for var-length columns). Then we call `GetRowOffsetsSelected` to populate the offsets. At last we call `AppendEmpty` again with `0` rows but `y` bytes, where `y` is the last offset element which is essentially the whole size of the var-length columns. Sounds all reasonable so far. However, `AppendEmpty` calls `ResizeOptionalVaryingLengthBuffer`, in which: https://github.com/apache/arrow/blob/e59832fb05dc40a85fa63297c77c8f134c9ac8e0/cpp/src/arrow/compute/row/row_internal.cc#L294-L303 We calculate `bytes_capacity_new` by keeping doubling it until it's big enough for `num_bytes + num_extra_bytes`. Note by the time of this point, `num_bytes == offsets()[num_rows_]` is already `y`, meanwhile `num_extra_bytes` is also `y`, hence the unexpected doubled size than necessary. ### What changes are included in this PR? Fix the wasted half size for buffers in row table. Also add tests to make sure the buffer size is as expected. ### Are these changes tested? UT included. ### Are there any user-facing changes? None. * GitHub Issue: #43129 Authored-by: Ruoxi Sun Signed-off-by: Antoine Pitrou --- cpp/src/arrow/compute/CMakeLists.txt | 1 + cpp/src/arrow/compute/row/encode_internal.cc | 17 ++- cpp/src/arrow/compute/row/row_internal.cc | 8 +- cpp/src/arrow/compute/row/row_internal.h | 18 ++- cpp/src/arrow/compute/row/row_test.cc | 129 +++++++++++++++++++ 5 files changed, 161 insertions(+), 12 deletions(-) create mode 100644 cpp/src/arrow/compute/row/row_test.cc diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index 0a8018cd580cf..e20b45897db95 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -92,6 +92,7 @@ add_arrow_test(internals_test key_hash_test.cc row/compare_test.cc row/grouper_test.cc + row/row_test.cc util_internal_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) diff --git a/cpp/src/arrow/compute/row/encode_internal.cc b/cpp/src/arrow/compute/row/encode_internal.cc index 01d552ef8270f..88ab5b81b1e0a 100644 --- a/cpp/src/arrow/compute/row/encode_internal.cc +++ b/cpp/src/arrow/compute/row/encode_internal.cc @@ -152,14 +152,21 @@ void RowTableEncoder::PrepareEncodeSelected(int64_t start_row, int64_t num_rows, Status RowTableEncoder::EncodeSelected(RowTableImpl* rows, uint32_t num_selected, const uint16_t* selection) { rows->Clean(); - RETURN_NOT_OK( - rows->AppendEmpty(static_cast(num_selected), static_cast(0))); + // First AppendEmpty with num_selected rows and zero extra bytes to resize the + // fixed-length buffers (including buffer for offsets). + RETURN_NOT_OK( + rows->AppendEmpty(static_cast(num_selected), + /*num_extra_bytes_to_append=*/static_cast(0))); + // Then populate the offsets of the var-length columns, which will be used as the target + // size of the var-length buffers resizing below. EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, num_selected, selection); - - RETURN_NOT_OK(rows->AppendEmpty(static_cast(0), - static_cast(rows->offsets()[num_selected]))); + // Last AppendEmpty with zero rows and zero extra bytes to resize the var-length buffers + // based on the populated offsets. + RETURN_NOT_OK( + rows->AppendEmpty(/*num_rows_to_append=*/static_cast(0), + /*num_extra_bytes_to_append=*/static_cast(0))); for (size_t icol = 0; icol < batch_all_cols_.size(); ++icol) { if (batch_all_cols_[icol].metadata().is_fixed_length) { diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index 469205e9b008d..9ac3a4c43e541 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -246,13 +246,13 @@ int64_t RowTableImpl::size_rows_varying_length(int64_t num_bytes) const { } void RowTableImpl::UpdateBufferPointers() { - buffers_[0] = null_masks_->mutable_data(); + buffers_[0] = null_masks_.get(); if (metadata_.is_fixed_length) { - buffers_[1] = rows_->mutable_data(); + buffers_[1] = rows_.get(); buffers_[2] = nullptr; } else { - buffers_[1] = offsets_->mutable_data(); - buffers_[2] = rows_->mutable_data(); + buffers_[1] = offsets_.get(); + buffers_[2] = rows_.get(); } } diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h index 3220b7ffe6e40..80409f93d2b96 100644 --- a/cpp/src/arrow/compute/row/row_internal.h +++ b/cpp/src/arrow/compute/row/row_internal.h @@ -189,11 +189,17 @@ class ARROW_EXPORT RowTableImpl { // Accessors into the table's buffers const uint8_t* data(int i) const { ARROW_DCHECK(i >= 0 && i < kMaxBuffers); - return buffers_[i]; + if (ARROW_PREDICT_TRUE(buffers_[i])) { + return buffers_[i]->data(); + } + return NULLPTR; } uint8_t* mutable_data(int i) { ARROW_DCHECK(i >= 0 && i < kMaxBuffers); - return buffers_[i]; + if (ARROW_PREDICT_TRUE(buffers_[i])) { + return buffers_[i]->mutable_data(); + } + return NULLPTR; } const uint32_t* offsets() const { return reinterpret_cast(data(1)); } uint32_t* mutable_offsets() { return reinterpret_cast(mutable_data(1)); } @@ -207,6 +213,12 @@ class ARROW_EXPORT RowTableImpl { /// successive calls bool has_any_nulls(const LightContext* ctx) const; + /// \brief Size of the table's buffers + int64_t buffer_size(int i) const { + ARROW_DCHECK(i >= 0 && i < kMaxBuffers); + return buffers_[i]->size(); + } + private: Status ResizeFixedLengthBuffers(int64_t num_extra_rows); Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes); @@ -236,7 +248,7 @@ class ARROW_EXPORT RowTableImpl { // Stores the fixed-length parts of the rows std::unique_ptr rows_; static constexpr int kMaxBuffers = 3; - uint8_t* buffers_[kMaxBuffers]; + ResizableBuffer* buffers_[kMaxBuffers]; // The number of rows in the table int64_t num_rows_; // The number of rows that can be stored in the table without resizing diff --git a/cpp/src/arrow/compute/row/row_test.cc b/cpp/src/arrow/compute/row/row_test.cc new file mode 100644 index 0000000000000..2c1a60dfb231c --- /dev/null +++ b/cpp/src/arrow/compute/row/row_test.cc @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/row/encode_internal.h" +#include "arrow/compute/row/row_internal.h" +#include "arrow/testing/generator.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { +namespace compute { + +namespace { + +Result MakeRowTableFromColumn(const std::shared_ptr& column, + int64_t num_rows, int row_alignment, + int string_alignment) { + DCHECK_GE(column->length(), num_rows); + MemoryPool* pool = default_memory_pool(); + + std::vector column_arrays; + std::vector values{column}; + ExecBatch batch(std::move(values), num_rows); + RETURN_NOT_OK(ColumnArraysFromExecBatch(batch, &column_arrays)); + + std::vector column_metadatas; + RETURN_NOT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + RowTableMetadata table_metadata; + table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment, + string_alignment); + + RowTableImpl row_table; + RETURN_NOT_OK(row_table.Init(pool, table_metadata)); + + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, row_alignment, string_alignment); + row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays); + + std::vector row_ids(num_rows); + std::iota(row_ids.begin(), row_ids.end(), 0); + + RETURN_NOT_OK(row_encoder.EncodeSelected(&row_table, static_cast(num_rows), + row_ids.data())); + + return row_table; +} + +} // namespace + +// GH-43129: Ensure that the memory consumption of the row table is reasonable, that is, +// with the growth factor of 2, the actual memory usage does not exceed twice the amount +// of memory actually needed. +TEST(RowTableMemoryConsumption, Encode) { + constexpr int64_t num_rows_max = 8192; + constexpr int64_t padding_for_vectors = 64; + + ASSERT_OK_AND_ASSIGN( + auto fixed_length_column, + ::arrow::gen::Constant(std::make_shared(0))->Generate(num_rows_max)); + ASSERT_OK_AND_ASSIGN(auto var_length_column, + ::arrow::gen::Constant(std::make_shared("X")) + ->Generate(num_rows_max)); + + for (int64_t num_rows : {1023, 1024, 1025, 4095, 4096, 4097}) { + // Fixed length column. + { + SCOPED_TRACE("encoding fixed length column of " + std::to_string(num_rows) + + " rows"); + ASSERT_OK_AND_ASSIGN(auto row_table, + MakeRowTableFromColumn(fixed_length_column, num_rows, + uint32()->byte_width(), 0)); + ASSERT_NE(row_table.data(0), NULLPTR); + ASSERT_NE(row_table.data(1), NULLPTR); + ASSERT_EQ(row_table.data(2), NULLPTR); + + int64_t actual_null_mask_size = + num_rows * row_table.metadata().null_masks_bytes_per_row; + ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors); + ASSERT_GT(actual_null_mask_size * 2, + row_table.buffer_size(0) - padding_for_vectors); + + int64_t actual_rows_size = num_rows * uint32()->byte_width(); + ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - padding_for_vectors); + ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - padding_for_vectors); + } + + // Var length column. + { + SCOPED_TRACE("encoding var length column of " + std::to_string(num_rows) + " rows"); + ASSERT_OK_AND_ASSIGN(auto row_table, + MakeRowTableFromColumn(var_length_column, num_rows, 4, 4)); + ASSERT_NE(row_table.data(0), NULLPTR); + ASSERT_NE(row_table.data(1), NULLPTR); + ASSERT_NE(row_table.data(2), NULLPTR); + + int64_t actual_null_mask_size = + num_rows * row_table.metadata().null_masks_bytes_per_row; + ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors); + ASSERT_GT(actual_null_mask_size * 2, + row_table.buffer_size(0) - padding_for_vectors); + + int64_t actual_offset_size = num_rows * sizeof(uint32_t); + ASSERT_LE(actual_offset_size, row_table.buffer_size(1) - padding_for_vectors); + ASSERT_GT(actual_offset_size * 2, row_table.buffer_size(1) - padding_for_vectors); + + int64_t actual_rows_size = num_rows * row_table.offsets()[1]; + ASSERT_LE(actual_rows_size, row_table.buffer_size(2) - padding_for_vectors); + ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(2) - padding_for_vectors); + } + } +} + +} // namespace compute +} // namespace arrow From 47b2fbd2aeaefbb944be788318b1c2f190fed1cc Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 10 Jul 2024 18:56:41 +0800 Subject: [PATCH 046/122] GH-43046: [C++] Fix avx2 gather rows more than 2^31 issue in `CompareColumnsToRows` (#43065) ### Rationale for this change See #43046. ### What changes are included in this PR? Use unsigned offset safe gather introduced in #42188 which is to fix similar issues. ### Are these changes tested? Yes. ### Are there any user-facing changes? None. * GitHub Issue: #43046 Lead-authored-by: Ruoxi Sun Co-authored-by: Rossi Sun Signed-off-by: Antoine Pitrou --- .../compute/row/compare_internal_avx2.cc | 72 +++++++++---------- cpp/src/arrow/compute/row/row_internal.cc | 1 + 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index ec511aa03a6d0..23238a3691c8a 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -180,6 +180,40 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2( } } +namespace { + +// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we +// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row +// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat +// it as negative offset and gather the data from undesired address. To avoid this issue, +// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` +// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those +// intrinsics are safe. + +constexpr uint64_t kTwoGB = 0x80000000ull; + +template +inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { + int const* normalized_base = base + kTwoGB / sizeof(int); + __m256i normalized_offset = + _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast(kTwoGB / kScale))); + return _mm256_i32gather_epi32(normalized_base, normalized_offset, + static_cast(kScale)); +} + +template +inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, + __m128i offset) { + arrow::util::int64_for_gather_t const* normalized_base = + base + kTwoGB / sizeof(arrow::util::int64_for_gather_t); + __m128i normalized_offset = + _mm_sub_epi32(offset, _mm_set1_epi32(static_cast(kTwoGB / kScale))); + return _mm256_i32gather_epi64(normalized_base, normalized_offset, + static_cast(kScale)); +} + +} // namespace + template uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( uint32_t offset_within_row, uint32_t num_rows_to_compare, @@ -236,10 +270,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( irow_right = _mm256_loadu_si256(reinterpret_cast(left_to_right_map) + i); } - // TODO: Need to test if this gather is OK when irow_right is larger than - // 0x80000000u. __m256i offset_right = - _mm256_i32gather_epi32((const int*)offsets_right, irow_right, 4); + UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right); offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row)); reinterpret_cast(match_bytevector)[i] = @@ -253,40 +285,6 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } -namespace { - -// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we -// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row -// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat -// it as negative offset and gather the data from undesired address. To avoid this issue, -// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` -// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those -// intrinsics are safe. - -constexpr uint64_t kTwoGB = 0x80000000ull; - -template -inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { - int const* normalized_base = base + kTwoGB / sizeof(int); - __m256i normalized_offset = - _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast(kTwoGB / kScale))); - return _mm256_i32gather_epi32(normalized_base, normalized_offset, - static_cast(kScale)); -} - -template -inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, - __m128i offset) { - arrow::util::int64_for_gather_t const* normalized_base = - base + kTwoGB / sizeof(arrow::util::int64_for_gather_t); - __m128i normalized_offset = - _mm_sub_epi32(offset, _mm_set1_epi32(static_cast(kTwoGB / kScale))); - return _mm256_i32gather_epi64(normalized_base, normalized_offset, - static_cast(kScale)); -} - -} // namespace - template inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base, __m256i irow_left, __m256i offset_right, diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index 9ac3a4c43e541..0d418fdecf488 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -325,6 +325,7 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from, // Varying-length rows auto from_offsets = reinterpret_cast(from.offsets_->data()); auto to_offsets = reinterpret_cast(offsets_->mutable_data()); + // TODO(GH-43202): The following two variables are possibly overflowing. uint32_t total_length = to_offsets[num_rows_]; uint32_t total_length_to_append = 0; for (uint32_t i = 0; i < num_rows_to_append; ++i) { From dffff1c7b0af86d92de45eaeedb46bfa136ce56b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 05:28:40 -0700 Subject: [PATCH 047/122] MINOR: Bump System.Text.Json from 5.0.2 to 8.0.4 in /csharp/test/Apache.Arrow.IntegrationTest (#43203) Bumps System.Text.Json from 5.0.2 to 8.0.4. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=System.Text.Json&package-manager=nuget&previous-version=5.0.2&new-version=8.0.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/arrow/network/alerts).
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.IntegrationTest.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj index 7f226fd08818f..21f06e3008774 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj +++ b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj @@ -9,7 +9,7 @@ - + From 5e451d85d7269d3fb9c7eaab06caece5718c40e5 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 10 Jul 2024 14:29:06 +0200 Subject: [PATCH 048/122] GH-43183: [C++] Add `date{32,64}` to `date{32,64}` cast (#43192) ### Rationale for this change This one seems to be missing, see https://github.com/apache/arrow/issues/43183 ### What changes are included in this PR? ### Are these changes tested? I'm not sure what the best place is to test this, please advise ### Are there any user-facing changes? * GitHub Issue: #43183 Lead-authored-by: Fokko Co-authored-by: Fokko Driesprong Co-authored-by: Hyunseok Seo Signed-off-by: Antoine Pitrou --- cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc | 10 ++++++++-- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 4 ++++ python/pyarrow/tests/test_compute.py | 8 ++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc index a5612643913aa..a293000d56640 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc @@ -510,9 +510,12 @@ void AddCrossUnitCastNoPreallocate(CastFunction* func) { std::shared_ptr GetDate32Cast() { auto func = std::make_shared("cast_date32", Type::DATE32); - auto out_ty = date32(); + const auto& out_ty = date32(); AddCommonCasts(Type::DATE32, out_ty, func.get()); + // date32 -> date32 + AddZeroCopyCast(Type::DATE32, date32(), date32(), func.get()); + // int32 -> date32 AddZeroCopyCast(Type::INT32, int32(), date32(), func.get()); @@ -532,9 +535,12 @@ std::shared_ptr GetDate32Cast() { std::shared_ptr GetDate64Cast() { auto func = std::make_shared("cast_date64", Type::DATE64); - auto out_ty = date64(); + const auto& out_ty = date64(); AddCommonCasts(Type::DATE64, out_ty, func.get()); + // date64 -> date64 + AddZeroCopyCast(Type::DATE64, date64(), date64(), func.get()); + // int64 -> date64 AddZeroCopyCast(Type::INT64, int64(), date64(), func.get()); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index f60d8f2e19e98..140789e59665b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1646,6 +1646,10 @@ TEST(Cast, DateToDate) { 86400000, 864000000])"); + // Zero copy + CheckCast(day_32, day_32); + CheckCast(day_64, day_64); + // Multiply promotion CheckCast(day_32, day_64); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 13e30ed1da493..64fe7f1deb510 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1842,6 +1842,14 @@ def test_cast(): assert pc.cast(arr, expected.type) == expected +@pytest.mark.parametrize('value_type', [pa.date32(), pa.date64()]) +def test_identity_cast_dates(value_type): + dt = datetime.date(1990, 3, 1) + + arr = pa.array([dt], type=value_type) + assert pc.cast(arr, value_type) == arr + + @pytest.mark.parametrize('value_type', numerical_arrow_types) def test_fsl_to_fsl_cast(value_type): # Different field name and different type. From c4013482c9172be894fe9748b8156f31d021dd5f Mon Sep 17 00:00:00 2001 From: Nozomi Isozaki <63992859+nontan-pixiv@users.noreply.github.com> Date: Thu, 11 Jul 2024 03:57:47 +0900 Subject: [PATCH 049/122] GH-41541: [Go][Parquet] More fixes for writer performance regression (#42003) ### Rationale for this change This PR is complementary to #41638 . The prior PR reduces reallocations in `PooledBufferWriter`. However the problematic formula it addressed is still used in other functions. In addition to this, `(*PooledBufferWriter).Reserve()` simply doubles the capacity of buffers regardless of its argument `nbytes`. This may result in excessive allocations in some cases. ### What changes are included in this PR? - Applied the fixed formula to `(*BufferWriter).Reserve()`. - Updated the new capacity passed to `(*memory.Buffer).Reserve()`. - Now using `bitutil.NextPowerOf2(b.pos + nbytes)` to avoid reallocations when adding `nbytes`. - Replaced `math.Max` with `utils.Max` in `(*bufferWriteSeeker).Reserve()` to avoid unnecessary type conversions. ### Are these changes tested? Yes. The following commands pass. ``` $ export PARQUET_TEST_DATA=$PWD/cpp/submodules/parquet-testing/data $ (cd go && go test ./...) ``` ### Are there any user-facing changes? No, but it may reduce the number of allocations and improve the throughput. Before: ``` $ go test -test.run='^$' -test.bench='^BenchmarkWriteColumn$' -benchmem ./parquet/pqarrow/... goos: linux goarch: arm64 pkg: github.com/apache/arrow/go/v17/parquet/pqarrow BenchmarkWriteColumn/int32_not_nullable-10 1190 1016705 ns/op 4125.39 MB/s 5443579 B/op 240 allocs/op BenchmarkWriteColumn/int32_nullable-10 52 24780561 ns/op 169.26 MB/s 12048944 B/op 249 allocs/op BenchmarkWriteColumn/int64_not_nullable-10 632 1717090 ns/op 4885.36 MB/s 5445954 B/op 265 allocs/op BenchmarkWriteColumn/int64_nullable-10 51 22949770 ns/op 365.52 MB/s 12209860 B/op 262 allocs/op BenchmarkWriteColumn/float32_not_nullable-10 519 2234718 ns/op 1876.88 MB/s 5452627 B/op 1263 allocs/op BenchmarkWriteColumn/float32_nullable-10 56 23423793 ns/op 179.06 MB/s 12057540 B/op 1272 allocs/op BenchmarkWriteColumn/float64_not_nullable-10 416 2761247 ns/op 3037.98 MB/s 5507068 B/op 1292 allocs/op BenchmarkWriteColumn/float64_nullable-10 51 25767881 ns/op 325.55 MB/s 12059614 B/op 1285 allocs/op PASS ok github.com/apache/arrow/go/v17/parquet/pqarrow 10.592s ``` After: ``` $ go test -test.run='^$' -test.bench='^BenchmarkWriteColumn$' -benchmem ./parquet/pqarrow/... goos: linux goarch: arm64 pkg: github.com/apache/arrow/go/v17/parquet/pqarrow BenchmarkWriteColumn/int32_not_nullable-10 1196 959528 ns/op 4371.22 MB/s 5420349 B/op 238 allocs/op BenchmarkWriteColumn/int32_nullable-10 51 23017598 ns/op 182.22 MB/s 14138480 B/op 248 allocs/op BenchmarkWriteColumn/int64_not_nullable-10 690 1671710 ns/op 5017.98 MB/s 5419878 B/op 263 allocs/op BenchmarkWriteColumn/int64_nullable-10 50 23196051 ns/op 361.64 MB/s 13728465 B/op 261 allocs/op BenchmarkWriteColumn/float32_not_nullable-10 540 2185075 ns/op 1919.52 MB/s 5459392 B/op 1261 allocs/op BenchmarkWriteColumn/float32_nullable-10 54 21796783 ns/op 192.43 MB/s 14150622 B/op 1271 allocs/op BenchmarkWriteColumn/float64_not_nullable-10 418 2708292 ns/op 3097.38 MB/s 5455095 B/op 1290 allocs/op BenchmarkWriteColumn/float64_nullable-10 51 22174952 ns/op 378.29 MB/s 14142791 B/op 1283 allocs/op PASS ok github.com/apache/arrow/go/v17/parquet/pqarrow 10.210s ``` * GitHub Issue: #41541 --- go/arrow/compute/utils.go | 6 +++--- go/parquet/internal/encoding/types.go | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/go/arrow/compute/utils.go b/go/arrow/compute/utils.go index b20688539f146..899fe4cfbf4cc 100644 --- a/go/arrow/compute/utils.go +++ b/go/arrow/compute/utils.go @@ -21,7 +21,6 @@ package compute import ( "fmt" "io" - "math" "time" "github.com/apache/arrow/go/v17/arrow" @@ -30,6 +29,7 @@ import ( "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" "github.com/apache/arrow/go/v17/arrow/internal/debug" "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v17/internal/utils" "golang.org/x/xerrors" ) @@ -43,9 +43,9 @@ func (b *bufferWriteSeeker) Reserve(nbytes int) { if b.buf == nil { b.buf = memory.NewResizableBuffer(b.mem) } - newCap := int(math.Max(float64(b.buf.Cap()), 256)) + newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { - newCap = bitutil.NextPowerOf2(newCap) + newCap = bitutil.NextPowerOf2(b.pos + nbytes) } b.buf.Reserve(newCap) } diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 2d7a5d6b1d166..6962c95d4f818 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -187,7 +187,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { - newCap = bitutil.NextPowerOf2(newCap) + newCap = bitutil.NextPowerOf2(b.pos + nbytes) } b.buf.Reserve(newCap) } @@ -380,9 +380,9 @@ func (b *BufferWriter) Reserve(nbytes int) { if b.buffer == nil { b.buffer = memory.NewResizableBuffer(b.mem) } - newCap := utils.Max(b.buffer.Cap()+b.offset, 256) - for newCap < b.pos+nbytes+b.offset { - newCap = bitutil.NextPowerOf2(newCap) + newCap := utils.Max(b.buffer.Cap(), 256) + for newCap < b.pos+nbytes { + newCap = bitutil.NextPowerOf2(b.pos + nbytes) } b.buffer.Reserve(newCap) } From 2ae192b4aca6017a141dc52f78b5dec4ad2f2874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 10 Jul 2024 23:04:11 +0200 Subject: [PATCH 050/122] GH-43204: [CI][Packaging] Apply vcpkg patch to fix Thrift version (#43208) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Currently our java-jars and some wheels jobs are failing due to downloading a wrong version of Apache Thrift based on the 0.20.0 branch instead of the tag. That branch contains a new commit that makes the sha validation to fail. ### What changes are included in this PR? Apply the Thrift patch that was applied on vcpkg here: https://github.com/microsoft/vcpkg/pull/39787 ### Are these changes tested? Via archery ### Are there any user-facing changes? No * GitHub Issue: #43204 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- ci/vcpkg/ports.patch | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch index 136b719ea72dd..67fb2a4a3ea76 100644 --- a/ci/vcpkg/ports.patch +++ b/ci/vcpkg/ports.patch @@ -65,3 +65,56 @@ index 000000000..e839c93a4 + } + + static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) { +diff --git a/ports/thrift/portfile.cmake b/ports/thrift/portfile.cmake +index 1501782..71d2147 100644 +--- a/ports/thrift/portfile.cmake ++++ b/ports/thrift/portfile.cmake +@@ -12,7 +12,7 @@ vcpkg_find_acquire_program(BISON) + vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO apache/thrift +- REF "${VERSION}" ++ REF "v${VERSION}" + SHA512 5e4ee9870b30fe5ba484d39781c435716f7f3903793dc8aae96594ca813b1a5a73363b84719038ca8fa3ab8ef0a419a28410d936ff7b3bbadf36fc085a6883ae + HEAD_REF master + PATCHES +diff --git a/ports/thrift/vcpkg.json b/ports/thrift/vcpkg.json +index 2d5a854..9ff49ec 100644 +--- a/ports/thrift/vcpkg.json ++++ b/ports/thrift/vcpkg.json +@@ -1,6 +1,7 @@ + { + "name": "thrift", + "version": "0.20.0", ++ "port-version": 1, + "description": "Apache Thrift is a software project spanning a variety of programming languages and use cases. Our goal is to make reliable, performant communication and data serialization across languages as efficient and seamless as possible.", + "homepage": "https://github.com/apache/thrift", + "license": "Apache-2.0", +diff --git a/versions/baseline.json b/versions/baseline.json +index c6ce736..9ad1d63 100644 +--- a/versions/baseline.json ++++ b/versions/baseline.json +@@ -8622,7 +8622,7 @@ + }, + "thrift": { + "baseline": "0.20.0", +- "port-version": 0 ++ "port-version": 1 + }, + "tidy-html5": { + "baseline": "5.8.0", +diff --git a/versions/t-/thrift.json b/versions/t-/thrift.json +index 3db38c5..7464bde 100644 +--- a/versions/t-/thrift.json ++++ b/versions/t-/thrift.json +@@ -1,5 +1,10 @@ + { + "versions": [ ++ { ++ "git-tree": "13757a6b05741cf3c9c39e3a1dcc5e5cd685e025", ++ "version": "0.20.0", ++ "port-version": 1 ++ }, + { + "git-tree": "6855be1ce96497811d4eb0a9879baf6cf1b3610c", + "version": "0.20.0", From 788c8f2c70a0460895a7811f529241343a65600e Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Wed, 10 Jul 2024 17:41:25 -0400 Subject: [PATCH 051/122] GH-43186: [Go] Use auto-aligned atomic int64 for pqarrow pathbuilders (#43206) ### Rationale for this change Fixes: #43186 Improves 32-bit support for the pqarrow writer. We may want to push similar changes to other refcount implementations to more completely support running on 32-bit system. ### What changes are included in this PR? Update `pathBuilder` and `multipathLevelBuilder` refCounts to use `atomic.Int64` which is automatically aligned on 32-bit systems. ### Are these changes tested? The issue reproduces with existing tests when building for a 32-bit architecture, so no new tests were added. This PR adds a "test" step to the existing workflow for 386 architecture builds, currently limited to run the tests fixed in this PR. ### Are there any user-facing changes? 32-bit systems should no longer panic when writing parquet files. * GitHub Issue: #43186 --- .github/workflows/go.yml | 13 ++++++----- go/internal/utils/ref_count.go | 26 ++++++++++++++++++++++ go/parquet/internal/bmi/bitmap_bmi2_386.go | 25 +++++++++++++++++++++ go/parquet/pqarrow/path_builder.go | 17 +++++++------- 4 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 go/internal/utils/ref_count.go create mode 100644 go/parquet/internal/bmi/bitmap_bmi2_386.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0d32628859fa0..c247a89128b34 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -168,8 +168,8 @@ jobs: python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python3 ci/scripts/go_bench_adapt.py - build386: - name: Go Cross-build for 386 + build_test_386: + name: Go Cross-build and test for 386 runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 20 @@ -188,9 +188,12 @@ jobs: cache: true cache-dependency-path: go/go.sum - name: Run build - run: | - cd go - GOARCH=386 go build ./... + run: GOARCH=386 go build ./... + working-directory: ./go + - name: Run test + # WIP refactor, only tests in the specified dirs have been fixed + run: GOARCH=386 go test ./parquet/file/... + working-directory: ./go docker_cgo: name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO diff --git a/go/internal/utils/ref_count.go b/go/internal/utils/ref_count.go new file mode 100644 index 0000000000000..9b85f75b14363 --- /dev/null +++ b/go/internal/utils/ref_count.go @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import "sync/atomic" + +// NewRefCount creates a new atomic counter set to the specified initial value. +func NewRefCount(initial int64) *atomic.Int64 { + var val atomic.Int64 + val.Store(initial) + return &val +} diff --git a/go/parquet/internal/bmi/bitmap_bmi2_386.go b/go/parquet/internal/bmi/bitmap_bmi2_386.go new file mode 100644 index 0000000000000..60f898f6bd557 --- /dev/null +++ b/go/parquet/internal/bmi/bitmap_bmi2_386.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bmi + +func init() { + funclist.extractBits = extractBitsGo + funclist.gtbitmap = greaterThanBitmapGo +} diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go index 13f2beca024f1..c28072afe2c24 100644 --- a/go/parquet/pqarrow/path_builder.go +++ b/go/parquet/pqarrow/path_builder.go @@ -25,6 +25,7 @@ import ( "github.com/apache/arrow/go/v17/arrow/array" "github.com/apache/arrow/go/v17/arrow/memory" "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v17/internal/utils" "github.com/apache/arrow/go/v17/parquet/internal/encoding" "golang.org/x/xerrors" ) @@ -301,15 +302,15 @@ type pathBuilder struct { paths []pathInfo nullableInParent bool - refCount int64 + refCount *atomic.Int64 } func (p *pathBuilder) Retain() { - atomic.AddInt64(&p.refCount, 1) + p.refCount.Add(1) } func (p *pathBuilder) Release() { - if atomic.AddInt64(&p.refCount, -1) == 0 { + if p.refCount.Add(-1) == 0 { for idx := range p.paths { p.paths[idx].primitiveArr.Release() p.paths[idx].primitiveArr = nil @@ -498,15 +499,15 @@ type multipathLevelBuilder struct { data arrow.ArrayData builder pathBuilder - refCount int64 + refCount *atomic.Int64 } func (m *multipathLevelBuilder) Retain() { - atomic.AddInt64(&m.refCount, 1) + m.refCount.Add(1) } func (m *multipathLevelBuilder) Release() { - if atomic.AddInt64(&m.refCount, -1) == 0 { + if m.refCount.Add(-1) == 0 { m.data.Release() m.data = nil m.builder.Release() @@ -516,10 +517,10 @@ func (m *multipathLevelBuilder) Release() { func newMultipathLevelBuilder(arr arrow.Array, fieldNullable bool) (*multipathLevelBuilder, error) { ret := &multipathLevelBuilder{ - refCount: 1, + refCount: utils.NewRefCount(1), rootRange: elemRange{int64(0), int64(arr.Data().Len())}, data: arr.Data(), - builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: 1}, + builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: utils.NewRefCount(1)}, } if err := ret.builder.Visit(arr); err != nil { return nil, err From 84df3438e39b470ec9bfe77e682a14bda07b0921 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 10 Jul 2024 15:55:00 -0700 Subject: [PATCH 052/122] GH-42240: [R] Fix crash in ParquetFileWriter$WriteTable and add WriteBatch (#42241) ### Rationale for this change See https://github.com/apache/arrow/issues/42240. ### What changes are included in this PR? - Fixes a crash in `ParquetFileWriter$WriteTable` by asserting the class of what's passed in and stopping if it's not a `Table` - Since I was already there, added `WriteBatch` to match [`pyarrow.parquet.ParquetWriter.write_batch`](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter.write_batch) which is just a convenience - Adds a test for the behavior of trying to write to a closed sink - Bumps the minimum Arrow C++ test version we test the R package with on CI from 13 to 15 - Removes one ARROW_VERSION_MAJOR >= 15 guard ### Are these changes tested? Yes. ### Are there any user-facing changes? New method on ParquetFileWriter (WriteBatch). * GitHub Issue: #42240 Authored-by: Bryce Mecum Signed-off-by: Jonathan Keane --- .github/workflows/r.yml | 2 +- r/NEWS.md | 1 + r/R/parquet.R | 7 +++++++ r/man/ParquetFileWriter.Rd | 1 + r/src/r_to_arrow.cpp | 9 --------- r/tests/testthat/test-parquet.R | 28 ++++++++++++++++++++++++++++ r/tools/check-versions.R | 4 ++-- r/tools/test-check-versions.R | 10 +++++++++- 8 files changed, 49 insertions(+), 13 deletions(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 6bd940f806775..e8f57db99c28c 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -62,7 +62,7 @@ jobs: strategy: matrix: include: - - cpp_version: "13.0.0" + - cpp_version: "15.0.2" steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 diff --git a/r/NEWS.md b/r/NEWS.md index 1e8a480ef5f65..c2690e6248dbc 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -24,6 +24,7 @@ * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. * The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) * R metadata, stored in the Arrow schema to support round-tripping data between R and Arrow/Parquet, is now serialized and deserialized more strictly. This makes it safer to load data from files from unknown sources into R data.frames. (#41969) +* The minimum version of the Arrow C++ library the Arrow R package can be built with has been bumped to 15.0.0 (#42241) # arrow 16.1.0 diff --git a/r/R/parquet.R b/r/R/parquet.R index 0ee6c62601c1d..88ce1c77128f7 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -419,6 +419,7 @@ ParquetWriterProperties$create <- function(column_names, #' @section Methods: #' #' - `WriteTable` Write a [Table] to `sink` +#' - `WriteBatch` Write a [RecordBatch] to `sink` #' - `Close` Close the writer. Note: does not close the `sink`. #' [arrow::io::OutputStream][OutputStream] has its own `close()` method. #' @@ -428,8 +429,14 @@ ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject, public = list( WriteTable = function(table, chunk_size) { + assert_is(table, "Table") parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) }, + WriteBatch = function(batch, ...) { + assert_is(batch, "RecordBatch") + table <- Table$create(batch) + self$WriteTable(table, ...) + }, Close = function() parquet___arrow___FileWriter__Close(self) ) ) diff --git a/r/man/ParquetFileWriter.Rd b/r/man/ParquetFileWriter.Rd index f36e85ab6c4ed..5779e574d46b1 100644 --- a/r/man/ParquetFileWriter.Rd +++ b/r/man/ParquetFileWriter.Rd @@ -24,6 +24,7 @@ takes the following arguments: \itemize{ \item \code{WriteTable} Write a \link{Table} to \code{sink} +\item \code{WriteBatch} Write a \link{RecordBatch} to \code{sink} \item \code{Close} Close the writer. Note: does not close the \code{sink}. \link[=OutputStream]{arrow::io::OutputStream} has its own \code{close()} method. } diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index a81210f0ad914..d2db11e14a787 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1050,7 +1050,6 @@ class RDictionaryConverter> template struct RConverterTrait; -#if ARROW_VERSION_MAJOR >= 15 template struct RConverterTrait< T, enable_if_t::value && !is_interval_type::value && @@ -1062,14 +1061,6 @@ template struct RConverterTrait> { // not implemented }; -#else -template -struct RConverterTrait< - T, enable_if_t::value && !is_interval_type::value && - !is_extension_type::value>> { - using type = RPrimitiveConverter; -}; -#endif template struct RConverterTrait> { diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index f2359116fdaf1..cc57022600f8d 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -530,3 +530,31 @@ test_that("thrift string and container size can be specified when reading Parque data <- reader_container$ReadTable() expect_identical(collect.ArrowTabular(data), example_data) }) + +test_that("We can use WriteBatch on ParquetFileWriter", { + tf <- tempfile() + on.exit(unlink(tf)) + sink <- FileOutputStream$create(tf) + sch <- schema(a = int32()) + props <- ParquetWriterProperties$create(column_names = names(sch)) + writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props) + + batch <- RecordBatch$create(data.frame(a = 1:10)) + writer$WriteBatch(batch, chunk_size = 10) + writer$WriteBatch(batch, chunk_size = 10) + writer$WriteBatch(batch, chunk_size = 10) + writer$Close() + + tbl <- read_parquet(tf) + expect_equal(nrow(tbl), 30) +}) + +test_that("WriteBatch on ParquetFileWriter errors when called on closed sink", { + sink <- FileOutputStream$create(tempfile()) + sch <- schema(a = int32()) + props <- ParquetWriterProperties$create(column_names = names(sch)) + writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props) + writer$Close() + batch <- RecordBatch$create(data.frame(a = 1:10)) + expect_error(writer$WriteBatch(batch, chunk_size = 10), "Operation on closed file") +}) diff --git a/r/tools/check-versions.R b/r/tools/check-versions.R index 34b2ef680c547..ea7fe93c52471 100644 --- a/r/tools/check-versions.R +++ b/r/tools/check-versions.R @@ -24,10 +24,10 @@ release_version_supported <- function(r_version, cpp_version) { r_version <- package_version(r_version) cpp_version <- package_version(cpp_version) major <- function(x) as.numeric(x[1, 1]) - minimum_cpp_version <- package_version("13.0.0") + minimum_cpp_version <- package_version("15.0.0") allow_mismatch <- identical(tolower(Sys.getenv("ARROW_R_ALLOW_CPP_VERSION_MISMATCH", "false")), "true") - # If we allow a version mismatch we still need to cover the minimum version (13.0.0 for now) + # If we allow a version mismatch we still need to cover the minimum version (15.0.0 for now) # we don't allow newer C++ versions as new features without additional feature gates are likely to # break the R package version_valid <- cpp_version >= minimum_cpp_version && major(cpp_version) <= major(r_version) diff --git a/r/tools/test-check-versions.R b/r/tools/test-check-versions.R index f558648bed1e3..14c0bee3fd88a 100644 --- a/r/tools/test-check-versions.R +++ b/r/tools/test-check-versions.R @@ -61,16 +61,24 @@ test_that("check_versions without mismatch", { test_that("check_versions with mismatch", { withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "false")) + expect_true( + release_version_supported("15.0.0", "15.0.0") + ) + expect_false( release_version_supported("15.0.0", "13.0.0") ) withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "true")) - expect_true( + expect_false( release_version_supported("15.0.0", "13.0.0") ) + expect_true( + release_version_supported("16.0.0", "15.0.0") + ) + expect_false( release_version_supported("15.0.0", "16.0.0") ) From 797ca30c2bc634f3352d44b1b9bf0bc017a3e426 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Thu, 11 Jul 2024 07:32:17 +0200 Subject: [PATCH 053/122] MINOR: [Java] Remove superfluous add-source execution (#43216) ### Rationale for this change protobuf plugin already adds generated protobuf classes to the list of source directories, so remove the superfluous build-helper:add-source execution ### Are these changes tested? Yes via CI/CD ### Are there any user-facing changes? No Authored-by: Laurent Goujon Signed-off-by: David Li --- java/flight/flight-core/pom.xml | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index a60631dae01fa..5f82de2724b1d 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -261,25 +261,6 @@ under the License. - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-generated-sources-to-classpath - - add-source - - generate-sources - - - ${project.build.directory}/generated-sources/protobuf - - - - - maven-assembly-plugin From c777ac804789cea9682256f1a84200d42c0f2a0f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Jul 2024 21:34:15 +0900 Subject: [PATCH 054/122] MINOR: [Java] Bump checker.framework.version from 3.44.0 to 3.45.0 in /java (#43178) Bumps `checker.framework.version` from 3.44.0 to 3.45.0. Updates `org.checkerframework:checker-qual` from 3.44.0 to 3.45.0
Release notes

Sourced from org.checkerframework:checker-qual's releases.

Checker Framework 3.45.0

Version 3.45.0 (July 1, 2024)

Implementation details:

Added a Tree argument to AnnotatedTypes.adaptParameters()

Deprecated methods:

  • TreeUtils.isVarArgs() => isVarargsCall()
  • TreeUtils.isVarArgMethodCall() => isVarargsCall()

Closed issues:

#152, #5575, #6630, #6641, #6648, #6676.

Changelog

Sourced from org.checkerframework:checker-qual's changelog.

Version 3.45.0 (July 1, 2024)

Implementation details:

Added a Tree argument to AnnotatedTypes.adaptParameters()

Deprecated methods:

  • TreeUtils.isVarArgs() => isVarargsCall()
  • TreeUtils.isVarArgMethodCall() => isVarargsCall()

Closed issues:

#152, #5575, #6630, #6641, #6648, #6676.

Commits
  • 9723ea7 new release 3.45.0
  • 83a8a55 Update for release.
  • 8d50b59 Update dependency com.amazonaws:aws-java-sdk-bom to v1.12.753 (#6690)
  • e3a0b54 Update dependency org.projectlombok:lombok to v1.18.34 (#6689)
  • 71f21f1 Update to Stubparser version 3.26.1
  • 6147c82 Remove references to the defunct Object Construction Checker
  • 936a6d0 Fix typo
  • 4b236c1 Project: replace JavaParser by javac (#6678)
  • 6cc0cd3 Non-Empty Checker (#6679)
  • 5c98838 Add a complete example of a resource wrapper type to the manual (#6680)
  • Additional commits viewable in compare view

Updates `org.checkerframework:checker` from 3.44.0 to 3.45.0
Release notes

Sourced from org.checkerframework:checker's releases.

Checker Framework 3.45.0

Version 3.45.0 (July 1, 2024)

Implementation details:

Added a Tree argument to AnnotatedTypes.adaptParameters()

Deprecated methods:

  • TreeUtils.isVarArgs() => isVarargsCall()
  • TreeUtils.isVarArgMethodCall() => isVarargsCall()

Closed issues:

#152, #5575, #6630, #6641, #6648, #6676.

Changelog

Sourced from org.checkerframework:checker's changelog.

Version 3.45.0 (July 1, 2024)

Implementation details:

Added a Tree argument to AnnotatedTypes.adaptParameters()

Deprecated methods:

  • TreeUtils.isVarArgs() => isVarargsCall()
  • TreeUtils.isVarArgMethodCall() => isVarargsCall()

Closed issues:

#152, #5575, #6630, #6641, #6648, #6676.

Commits
  • 9723ea7 new release 3.45.0
  • 83a8a55 Update for release.
  • 8d50b59 Update dependency com.amazonaws:aws-java-sdk-bom to v1.12.753 (#6690)
  • e3a0b54 Update dependency org.projectlombok:lombok to v1.18.34 (#6689)
  • 71f21f1 Update to Stubparser version 3.26.1
  • 6147c82 Remove references to the defunct Object Construction Checker
  • 936a6d0 Fix typo
  • 4b236c1 Project: replace JavaParser by javac (#6678)
  • 6cc0cd3 Non-Empty Checker (#6679)
  • 5c98838 Add a complete example of a resource wrapper type to the manual (#6680)
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 2cc3efc29ff13..b38e3382d4d24 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -105,7 +105,7 @@ under the License. 2.28.0 5.11.0 5.2.0 - 3.44.0 + 3.45.0 none -Xdoclint:none From 6e438e64eac19af0a1c66ceb809d4ae2d18d900f Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 12 Jul 2024 03:50:53 +1200 Subject: [PATCH 055/122] GH-43142: [C++][Parquet] Refactor Encryptor API to use arrow::util::span instead of raw pointers (#43195) ### Rationale for this change See #43142. This is a follow up to #43071 which refactored the Decryptor API and added extra checks to prevent segfaults. This PR makes similar changes to the Encryptor API for consistency and better maintainability. ### What changes are included in this PR? * Change `AesEncryptor::Encrypt` and `Encryptor::Encrypt` to use `arrow::util::span` instead of raw pointers * Replace the `AesEncryptor::CiphertextSizeDelta` method with a `CiphertextLength` method that checks for overflow and abstracts the size difference behaviour away from consumer code for improved readability. ### Are these changes tested? * This is mostly a refactoring of existing code so is covered by existing tests. ### Are there any user-facing changes? No * GitHub Issue: #43142 Lead-authored-by: Adam Reeve Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/parquet/column_writer.cc | 15 +- .../parquet/encryption/encryption_internal.cc | 231 +++++++++++------- .../parquet/encryption/encryption_internal.h | 18 +- .../encryption/encryption_internal_nossl.cc | 18 +- .../encryption/encryption_internal_test.cc | 20 +- .../encryption/internal_file_encryptor.cc | 11 +- .../encryption/internal_file_encryptor.h | 6 +- .../encryption/key_toolkit_internal.cc | 15 +- cpp/src/parquet/metadata.cc | 34 +-- cpp/src/parquet/thrift_internal.h | 14 +- 10 files changed, 218 insertions(+), 164 deletions(-) diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index ac1c3ea2e3e20..c9f6e482981c0 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -303,9 +303,10 @@ class SerializedPageWriter : public PageWriter { if (data_encryptor_.get()) { UpdateEncryption(encryption::kDictionaryPage); PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( - data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); - output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len, - encryption_buffer_->mutable_data()); + data_encryptor_->CiphertextLength(output_data_len), false)); + output_data_len = + data_encryptor_->Encrypt(compressed_data->span_as(), + encryption_buffer_->mutable_span_as()); output_data_buffer = encryption_buffer_->data(); } @@ -395,11 +396,11 @@ class SerializedPageWriter : public PageWriter { if (data_encryptor_.get()) { PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( - data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); + data_encryptor_->CiphertextLength(output_data_len), false)); UpdateEncryption(encryption::kDataPage); - output_data_len = data_encryptor_->Encrypt(compressed_data->data(), - static_cast(output_data_len), - encryption_buffer_->mutable_data()); + output_data_len = + data_encryptor_->Encrypt(compressed_data->span_as(), + encryption_buffer_->mutable_span_as()); output_data_buffer = encryption_buffer_->data(); } diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index c5d2d1728ba1e..6168dd2a9bd61 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -58,12 +58,12 @@ class AesEncryptor::AesEncryptorImpl { ~AesEncryptorImpl() { WipeOut(); } - int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext); + int Encrypt(span plaintext, span key, + span aad, span ciphertext); - int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - const uint8_t* nonce, uint8_t* encrypted_footer); + int SignedFooterEncrypt(span footer, span key, + span aad, span nonce, + span encrypted_footer); void WipeOut() { if (nullptr != ctx_) { EVP_CIPHER_CTX_free(ctx_); @@ -71,7 +71,21 @@ class AesEncryptor::AesEncryptorImpl { } } - int ciphertext_size_delta() { return ciphertext_size_delta_; } + [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const { + if (plaintext_len < 0) { + std::stringstream ss; + ss << "Negative plaintext length " << plaintext_len; + throw ParquetException(ss.str()); + } else if (plaintext_len > + std::numeric_limits::max() - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext length " << plaintext_len << " plus ciphertext size delta " + << ciphertext_size_delta_ << " overflows int32"; + throw ParquetException(ss.str()); + } + + return static_cast(plaintext_len + ciphertext_size_delta_); + } private: EVP_CIPHER_CTX* ctx_; @@ -80,12 +94,12 @@ class AesEncryptor::AesEncryptorImpl { int ciphertext_size_delta_; int length_buffer_length_; - int GcmEncrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, const uint8_t* aad, int aad_len, - uint8_t* ciphertext); + int GcmEncrypt(span plaintext, span key, + span nonce, span aad, + span ciphertext); - int CtrEncrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, uint8_t* ciphertext); + int CtrEncrypt(span plaintext, span key, + span nonce, span ciphertext); }; AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int key_len, @@ -137,12 +151,21 @@ AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int } } -int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt( - const uint8_t* footer, int footer_len, const uint8_t* key, int key_len, - const uint8_t* aad, int aad_len, const uint8_t* nonce, uint8_t* encrypted_footer) { - if (key_length_ != key_len) { +int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(span footer, + span key, + span aad, + span nonce, + span encrypted_footer) { + if (static_cast(key_length_) != key.size()) { + std::stringstream ss; + ss << "Wrong key length " << key.size() << ". Should be " << key_length_; + throw ParquetException(ss.str()); + } + + if (encrypted_footer.size() != footer.size() + ciphertext_size_delta_) { std::stringstream ss; - ss << "Wrong key length " << key_len << ". Should be " << key_length_; + ss << "Encrypted footer buffer length " << encrypted_footer.size() + << " does not match expected length " << (footer.size() + ciphertext_size_delta_); throw ParquetException(ss.str()); } @@ -150,72 +173,85 @@ int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt( throw ParquetException("Must use AES GCM (metadata) encryptor"); } - return GcmEncrypt(footer, footer_len, key, key_len, nonce, aad, aad_len, - encrypted_footer); + return GcmEncrypt(footer, key, nonce, aad, encrypted_footer); } -int AesEncryptor::AesEncryptorImpl::Encrypt(const uint8_t* plaintext, int plaintext_len, - const uint8_t* key, int key_len, - const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { - if (key_length_ != key_len) { +int AesEncryptor::AesEncryptorImpl::Encrypt(span plaintext, + span key, + span aad, + span ciphertext) { + if (static_cast(key_length_) != key.size()) { std::stringstream ss; - ss << "Wrong key length " << key_len << ". Should be " << key_length_; + ss << "Wrong key length " << key.size() << ". Should be " << key_length_; throw ParquetException(ss.str()); } - uint8_t nonce[kNonceLength]; - memset(nonce, 0, kNonceLength); + if (ciphertext.size() != plaintext.size() + ciphertext_size_delta_) { + std::stringstream ss; + ss << "Ciphertext buffer length " << ciphertext.size() + << " does not match expected length " + << (plaintext.size() + ciphertext_size_delta_); + throw ParquetException(ss.str()); + } + + std::array nonce{}; // Random nonce - RAND_bytes(nonce, sizeof(nonce)); + RAND_bytes(nonce.data(), kNonceLength); if (kGcmMode == aes_mode_) { - return GcmEncrypt(plaintext, plaintext_len, key, key_len, nonce, aad, aad_len, - ciphertext); + return GcmEncrypt(plaintext, key, nonce, aad, ciphertext); } - return CtrEncrypt(plaintext, plaintext_len, key, key_len, nonce, ciphertext); + return CtrEncrypt(plaintext, key, nonce, ciphertext); } -int AesEncryptor::AesEncryptorImpl::GcmEncrypt(const uint8_t* plaintext, - int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, - const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { +int AesEncryptor::AesEncryptorImpl::GcmEncrypt(span plaintext, + span key, + span nonce, + span aad, + span ciphertext) { int len; int ciphertext_len; - uint8_t tag[kGcmTagLength]; - memset(tag, 0, kGcmTagLength); + std::array tag{}; + + if (nonce.size() != static_cast(kNonceLength)) { + std::stringstream ss; + ss << "Invalid nonce size " << nonce.size() << ", expected " << kNonceLength; + throw ParquetException(ss.str()); + } // Setting key and IV (nonce) - if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key, nonce)) { + if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key.data(), nonce.data())) { throw ParquetException("Couldn't set key and nonce"); } // Setting additional authenticated data - if ((nullptr != aad) && (1 != EVP_EncryptUpdate(ctx_, nullptr, &len, aad, aad_len))) { + if ((!aad.empty()) && (1 != EVP_EncryptUpdate(ctx_, nullptr, &len, aad.data(), + static_cast(aad.size())))) { throw ParquetException("Couldn't set AAD"); } // Encryption - if (1 != EVP_EncryptUpdate(ctx_, ciphertext + length_buffer_length_ + kNonceLength, - &len, plaintext, plaintext_len)) { + if (1 != + EVP_EncryptUpdate(ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength, + &len, plaintext.data(), static_cast(plaintext.size()))) { throw ParquetException("Failed encryption update"); } ciphertext_len = len; // Finalization - if (1 != EVP_EncryptFinal_ex( - ctx_, ciphertext + length_buffer_length_ + kNonceLength + len, &len)) { + if (1 != + EVP_EncryptFinal_ex( + ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength + len, &len)) { throw ParquetException("Failed encryption finalization"); } ciphertext_len += len; // Getting the tag - if (1 != EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_GET_TAG, kGcmTagLength, tag)) { + if (1 != EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_GET_TAG, kGcmTagLength, tag.data())) { throw ParquetException("Couldn't get AES-GCM tag"); } @@ -227,45 +263,53 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(const uint8_t* plaintext, ciphertext[1] = static_cast(0xff & (buffer_size >> 8)); ciphertext[0] = static_cast(0xff & (buffer_size)); } - std::copy(nonce, nonce + kNonceLength, ciphertext + length_buffer_length_); - std::copy(tag, tag + kGcmTagLength, - ciphertext + length_buffer_length_ + kNonceLength + ciphertext_len); + std::copy(nonce.begin(), nonce.begin() + kNonceLength, + ciphertext.begin() + length_buffer_length_); + std::copy(tag.begin(), tag.end(), + ciphertext.begin() + length_buffer_length_ + kNonceLength + ciphertext_len); return length_buffer_length_ + buffer_size; } -int AesEncryptor::AesEncryptorImpl::CtrEncrypt(const uint8_t* plaintext, - int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* nonce, - uint8_t* ciphertext) { +int AesEncryptor::AesEncryptorImpl::CtrEncrypt(span plaintext, + span key, + span nonce, + span ciphertext) { int len; int ciphertext_len; + if (nonce.size() != static_cast(kNonceLength)) { + std::stringstream ss; + ss << "Invalid nonce size " << nonce.size() << ", expected " << kNonceLength; + throw ParquetException(ss.str()); + } + // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial // counter field. // The first 31 bits of the initial counter field are set to 0, the last bit // is set to 1. - uint8_t iv[kCtrIvLength]; - memset(iv, 0, kCtrIvLength); - std::copy(nonce, nonce + kNonceLength, iv); + std::array iv{}; + std::copy(nonce.begin(), nonce.begin() + kNonceLength, iv.begin()); iv[kCtrIvLength - 1] = 1; // Setting key and IV - if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key, iv)) { + if (1 != EVP_EncryptInit_ex(ctx_, nullptr, nullptr, key.data(), iv.data())) { throw ParquetException("Couldn't set key and IV"); } // Encryption - if (1 != EVP_EncryptUpdate(ctx_, ciphertext + length_buffer_length_ + kNonceLength, - &len, plaintext, plaintext_len)) { + if (1 != + EVP_EncryptUpdate(ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength, + &len, plaintext.data(), static_cast(plaintext.size()))) { throw ParquetException("Failed encryption update"); } ciphertext_len = len; // Finalization - if (1 != EVP_EncryptFinal_ex( - ctx_, ciphertext + length_buffer_length_ + kNonceLength + len, &len)) { + if (1 != + EVP_EncryptFinal_ex( + ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength + len, &len)) { throw ParquetException("Failed encryption finalization"); } @@ -279,29 +323,29 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(const uint8_t* plaintext, ciphertext[1] = static_cast(0xff & (buffer_size >> 8)); ciphertext[0] = static_cast(0xff & (buffer_size)); } - std::copy(nonce, nonce + kNonceLength, ciphertext + length_buffer_length_); + std::copy(nonce.begin(), nonce.begin() + kNonceLength, + ciphertext.begin() + length_buffer_length_); return length_buffer_length_ + buffer_size; } AesEncryptor::~AesEncryptor() {} -int AesEncryptor::SignedFooterEncrypt(const uint8_t* footer, int footer_len, - const uint8_t* key, int key_len, const uint8_t* aad, - int aad_len, const uint8_t* nonce, - uint8_t* encrypted_footer) { - return impl_->SignedFooterEncrypt(footer, footer_len, key, key_len, aad, aad_len, nonce, - encrypted_footer); +int AesEncryptor::SignedFooterEncrypt(span footer, span key, + span aad, span nonce, + span encrypted_footer) { + return impl_->SignedFooterEncrypt(footer, key, aad, nonce, encrypted_footer); } void AesEncryptor::WipeOut() { impl_->WipeOut(); } -int AesEncryptor::CiphertextSizeDelta() { return impl_->ciphertext_size_delta(); } +int32_t AesEncryptor::CiphertextLength(int64_t plaintext_len) const { + return impl_->CiphertextLength(plaintext_len); +} -int AesEncryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { - return impl_->Encrypt(plaintext, plaintext_len, key, key_len, aad, aad_len, ciphertext); +int AesEncryptor::Encrypt(span plaintext, span key, + span aad, span ciphertext) { + return impl_->Encrypt(plaintext, key, aad, ciphertext); } AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata, @@ -341,6 +385,11 @@ class AesDecryptor::AesDecryptorImpl { std::stringstream ss; ss << "Negative plaintext length " << plaintext_len; throw ParquetException(ss.str()); + } else if (plaintext_len > std::numeric_limits::max() - ciphertext_size_delta_) { + std::stringstream ss; + ss << "Plaintext length " << plaintext_len << " plus ciphertext size delta " + << ciphertext_size_delta_ << " overflows int32"; + throw ParquetException(ss.str()); } return plaintext_len + ciphertext_size_delta_; } @@ -481,13 +530,16 @@ int AesDecryptor::AesDecryptorImpl::GetCiphertextLength( } // Extract ciphertext length - int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) | - ((ciphertext[2] & 0xff) << 16) | - ((ciphertext[1] & 0xff) << 8) | ((ciphertext[0] & 0xff)); + uint32_t written_ciphertext_len = (static_cast(ciphertext[3]) << 24) | + (static_cast(ciphertext[2]) << 16) | + (static_cast(ciphertext[1]) << 8) | + (static_cast(ciphertext[0])); - if (written_ciphertext_len < 0) { + if (written_ciphertext_len > + static_cast(std::numeric_limits::max() - length_buffer_length_)) { std::stringstream ss; - ss << "Negative ciphertext length " << written_ciphertext_len; + ss << "Written ciphertext length " << written_ciphertext_len + << " plus length buffer length " << length_buffer_length_ << " overflows int"; throw ParquetException(ss.str()); } else if (ciphertext.size() < static_cast(written_ciphertext_len) + length_buffer_length_) { @@ -499,11 +551,11 @@ int AesDecryptor::AesDecryptorImpl::GetCiphertextLength( throw ParquetException(ss.str()); } - return written_ciphertext_len + length_buffer_length_; + return static_cast(written_ciphertext_len) + length_buffer_length_; } else { - if (ciphertext.size() > static_cast(std::numeric_limits::max())) { + if (ciphertext.size() > static_cast(std::numeric_limits::max())) { std::stringstream ss; - ss << "Ciphertext buffer length " << ciphertext.size() << " overflows int32"; + ss << "Ciphertext buffer length " << ciphertext.size() << " overflows int"; throw ParquetException(ss.str()); } return static_cast(ciphertext.size()); @@ -517,10 +569,8 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span ciphertext, int len; int plaintext_len; - uint8_t tag[kGcmTagLength]; - memset(tag, 0, kGcmTagLength); - uint8_t nonce[kNonceLength]; - memset(nonce, 0, kNonceLength); + std::array tag{}; + std::array nonce{}; int ciphertext_len = GetCiphertextLength(ciphertext); @@ -540,12 +590,12 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span ciphertext, // Extracting IV and tag std::copy(ciphertext.begin() + length_buffer_length_, - ciphertext.begin() + length_buffer_length_ + kNonceLength, nonce); + ciphertext.begin() + length_buffer_length_ + kNonceLength, nonce.begin()); std::copy(ciphertext.begin() + ciphertext_len - kGcmTagLength, - ciphertext.begin() + ciphertext_len, tag); + ciphertext.begin() + ciphertext_len, tag.begin()); // Setting key and IV - if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), nonce)) { + if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), nonce.data())) { throw ParquetException("Couldn't set key and IV"); } @@ -566,7 +616,7 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span ciphertext, plaintext_len = len; // Checking the tag (authentication) - if (!EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_SET_TAG, kGcmTagLength, tag)) { + if (!EVP_CIPHER_CTX_ctrl(ctx_, EVP_CTRL_GCM_SET_TAG, kGcmTagLength, tag.data())) { throw ParquetException("Failed authentication"); } @@ -585,8 +635,7 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span ciphertext, int len; int plaintext_len; - uint8_t iv[kCtrIvLength]; - memset(iv, 0, kCtrIvLength); + std::array iv{}; int ciphertext_len = GetCiphertextLength(ciphertext); @@ -606,7 +655,7 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span ciphertext, // Extracting nonce std::copy(ciphertext.begin() + length_buffer_length_, - ciphertext.begin() + length_buffer_length_ + kNonceLength, iv); + ciphertext.begin() + length_buffer_length_ + kNonceLength, iv.begin()); // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial // counter field. // The first 31 bits of the initial counter field are set to 0, the last bit @@ -614,7 +663,7 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span ciphertext, iv[kCtrIvLength - 1] = 1; // Setting key and IV - if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), iv)) { + if (1 != EVP_DecryptInit_ex(ctx_, nullptr, nullptr, key.data(), iv.data())) { throw ParquetException("Couldn't set key and IV"); } diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 2d5450553c16d..a9a17f1ab98e3 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -61,18 +61,22 @@ class PARQUET_EXPORT AesEncryptor { ~AesEncryptor(); - /// Size difference between plaintext and ciphertext, for this cipher. - int CiphertextSizeDelta(); + /// The size of the ciphertext, for this cipher and the specified plaintext length. + [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const; /// Encrypts plaintext with the key and aad. Key length is passed only for validation. /// If different from value in constructor, exception will be thrown. - int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext); + int Encrypt(::arrow::util::span plaintext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span ciphertext); /// Encrypts plaintext footer, in order to compute footer signature (tag). - int SignedFooterEncrypt(const uint8_t* footer, int footer_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - const uint8_t* nonce, uint8_t* encrypted_footer); + int SignedFooterEncrypt(::arrow::util::span footer, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span nonce, + ::arrow::util::span encrypted_footer); void WipeOut(); diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index ed323c4aa6167..2f6cdc8200016 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -29,24 +29,26 @@ class AesEncryptor::AesEncryptorImpl {}; AesEncryptor::~AesEncryptor() {} -int AesEncryptor::SignedFooterEncrypt(const uint8_t* footer, int footer_len, - const uint8_t* key, int key_len, const uint8_t* aad, - int aad_len, const uint8_t* nonce, - uint8_t* encrypted_footer) { +int AesEncryptor::SignedFooterEncrypt(::arrow::util::span footer, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span nonce, + ::arrow::util::span encrypted_footer) { ThrowOpenSSLRequiredException(); return -1; } void AesEncryptor::WipeOut() { ThrowOpenSSLRequiredException(); } -int AesEncryptor::CiphertextSizeDelta() { +int32_t AesEncryptor::CiphertextLength(int64_t plaintext_len) const { ThrowOpenSSLRequiredException(); return -1; } -int AesEncryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key, - int key_len, const uint8_t* aad, int aad_len, - uint8_t* ciphertext) { +int AesEncryptor::Encrypt(::arrow::util::span plaintext, + ::arrow::util::span key, + ::arrow::util::span aad, + ::arrow::util::span ciphertext) { ThrowOpenSSLRequiredException(); return -1; } diff --git a/cpp/src/parquet/encryption/encryption_internal_test.cc b/cpp/src/parquet/encryption/encryption_internal_test.cc index cf7eeef4c6446..22e14663ea81f 100644 --- a/cpp/src/parquet/encryption/encryption_internal_test.cc +++ b/cpp/src/parquet/encryption/encryption_internal_test.cc @@ -37,14 +37,12 @@ class TestAesEncryption : public ::testing::Test { AesEncryptor encryptor(cipher_type, key_length_, metadata, write_length); - int expected_ciphertext_len = - static_cast(plain_text_.size()) + encryptor.CiphertextSizeDelta(); + int32_t expected_ciphertext_len = + encryptor.CiphertextLength(static_cast(plain_text_.size())); std::vector ciphertext(expected_ciphertext_len, '\0'); - int ciphertext_length = - encryptor.Encrypt(str2bytes(plain_text_), static_cast(plain_text_.size()), - str2bytes(key_), static_cast(key_.size()), str2bytes(aad_), - static_cast(aad_.size()), ciphertext.data()); + int ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_), + str2span(aad_), ciphertext); ASSERT_EQ(ciphertext_length, expected_ciphertext_len); @@ -87,14 +85,12 @@ class TestAesEncryption : public ::testing::Test { AesEncryptor encryptor(cipher_type, key_length_, metadata, write_length); - int expected_ciphertext_len = - static_cast(plain_text_.size()) + encryptor.CiphertextSizeDelta(); + int32_t expected_ciphertext_len = + encryptor.CiphertextLength(static_cast(plain_text_.size())); std::vector ciphertext(expected_ciphertext_len, '\0'); - int ciphertext_length = - encryptor.Encrypt(str2bytes(plain_text_), static_cast(plain_text_.size()), - str2bytes(key_), static_cast(key_.size()), str2bytes(aad_), - static_cast(aad_.size()), ciphertext.data()); + int ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_), + str2span(aad_), ciphertext); AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length); diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 15bf52b84dd1b..a423cc678cccb 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -31,12 +31,13 @@ Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& aad_(aad), pool_(pool) {} -int Encryptor::CiphertextSizeDelta() { return aes_encryptor_->CiphertextSizeDelta(); } +int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const { + return aes_encryptor_->CiphertextLength(plaintext_len); +} -int Encryptor::Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext) { - return aes_encryptor_->Encrypt(plaintext, plaintext_len, str2bytes(key_), - static_cast(key_.size()), str2bytes(aad_), - static_cast(aad_.size()), ciphertext); +int Encryptor::Encrypt(::arrow::util::span plaintext, + ::arrow::util::span ciphertext) { + return aes_encryptor_->Encrypt(plaintext, str2span(key_), str2span(aad_), ciphertext); } // InternalFileEncryptor diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index 3cbe53500c2c5..41ffc6fd51943 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -43,8 +43,10 @@ class PARQUET_EXPORT Encryptor { void UpdateAad(const std::string& aad) { aad_ = aad; } ::arrow::MemoryPool* pool() { return pool_; } - int CiphertextSizeDelta(); - int Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext); + [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const; + + int Encrypt(::arrow::util::span plaintext, + ::arrow::util::span ciphertext); bool EncryptColumnMetaData( bool encrypted_footer, diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index a3c7c996b130a..5d7925aa0318f 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -32,15 +32,14 @@ std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& m static_cast(master_key.size()), false, false /*write_length*/); - int encrypted_key_len = - static_cast(key_bytes.size()) + key_encryptor.CiphertextSizeDelta(); + int32_t encrypted_key_len = + key_encryptor.CiphertextLength(static_cast(key_bytes.size())); std::string encrypted_key(encrypted_key_len, '\0'); - encrypted_key_len = key_encryptor.Encrypt( - reinterpret_cast(key_bytes.data()), - static_cast(key_bytes.size()), - reinterpret_cast(master_key.data()), - static_cast(master_key.size()), reinterpret_cast(aad.data()), - static_cast(aad.size()), reinterpret_cast(&encrypted_key[0])); + ::arrow::util::span encrypted_key_span( + reinterpret_cast(&encrypted_key[0]), encrypted_key_len); + + encrypted_key_len = key_encryptor.Encrypt(str2span(key_bytes), str2span(master_key), + str2span(aad), encrypted_key_span); return ::arrow::util::base64_encode( ::std::string_view(encrypted_key.data(), encrypted_key_len)); diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index d7be50a6116bd..4ea3b05340d71 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -640,11 +640,13 @@ class FileMetaData::FileMetaDataImpl { uint32_t serialized_len = metadata_len_; ThriftSerializer serializer; serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data); + ::arrow::util::span serialized_data_span(serialized_data, + serialized_len); // encrypt with nonce - auto nonce = const_cast(reinterpret_cast(signature)); - auto tag = const_cast(reinterpret_cast(signature)) + - encryption::kNonceLength; + ::arrow::util::span nonce(reinterpret_cast(signature), + encryption::kNonceLength); + auto tag = reinterpret_cast(signature) + encryption::kNonceLength; std::string key = file_decryptor_->GetFooterKey(); std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); @@ -653,13 +655,11 @@ class FileMetaData::FileMetaDataImpl { file_decryptor_->algorithm(), static_cast(key.size()), true, false /*write_length*/, nullptr); - std::shared_ptr encrypted_buffer = std::static_pointer_cast( - AllocateBuffer(file_decryptor_->pool(), - aes_encryptor->CiphertextSizeDelta() + serialized_len)); + std::shared_ptr encrypted_buffer = AllocateBuffer( + file_decryptor_->pool(), aes_encryptor->CiphertextLength(serialized_len)); uint32_t encrypted_len = aes_encryptor->SignedFooterEncrypt( - serialized_data, serialized_len, str2bytes(key), static_cast(key.size()), - str2bytes(aad), static_cast(aad.size()), nonce, - encrypted_buffer->mutable_data()); + serialized_data_span, str2span(key), str2span(aad), nonce, + encrypted_buffer->mutable_span_as()); // Delete AES encryptor object. It was created only to verify the footer signature. aes_encryptor->WipeOut(); delete aes_encryptor; @@ -701,12 +701,12 @@ class FileMetaData::FileMetaDataImpl { uint8_t* serialized_data; uint32_t serialized_len; serializer.SerializeToBuffer(metadata_.get(), &serialized_len, &serialized_data); + ::arrow::util::span serialized_data_span(serialized_data, + serialized_len); // encrypt the footer key - std::vector encrypted_data(encryptor->CiphertextSizeDelta() + - serialized_len); - unsigned encrypted_len = - encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data()); + std::vector encrypted_data(encryptor->CiphertextLength(serialized_len)); + int encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data); // write unencrypted footer PARQUET_THROW_NOT_OK(dst->Write(serialized_data, serialized_len)); @@ -1559,11 +1559,11 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { serializer.SerializeToBuffer(&column_chunk_->meta_data, &serialized_len, &serialized_data); + ::arrow::util::span serialized_data_span(serialized_data, + serialized_len); - std::vector encrypted_data(encryptor->CiphertextSizeDelta() + - serialized_len); - unsigned encrypted_len = - encryptor->Encrypt(serialized_data, serialized_len, encrypted_data.data()); + std::vector encrypted_data(encryptor->CiphertextLength(serialized_len)); + int encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data); const char* temp = const_cast(reinterpret_cast(encrypted_data.data())); diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 4e4d7ed9837df..b21b0e07afba2 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -417,8 +417,8 @@ class ThriftDeserializer { throw ParquetException(ss.str()); } // decrypt - auto decrypted_buffer = std::static_pointer_cast(AllocateBuffer( - decryptor->pool(), decryptor->PlaintextLength(static_cast(clen)))); + auto decrypted_buffer = AllocateBuffer( + decryptor->pool(), decryptor->PlaintextLength(static_cast(clen))); ::arrow::util::span cipher_buf(buf, clen); uint32_t decrypted_buffer_len = decryptor->Decrypt(cipher_buf, decrypted_buffer->mutable_span_as()); @@ -525,13 +525,13 @@ class ThriftSerializer { } } - int64_t SerializeEncryptedObj(ArrowOutputStream* out, uint8_t* out_buffer, + int64_t SerializeEncryptedObj(ArrowOutputStream* out, const uint8_t* out_buffer, uint32_t out_length, Encryptor* encryptor) { - auto cipher_buffer = std::static_pointer_cast(AllocateBuffer( - encryptor->pool(), - static_cast(encryptor->CiphertextSizeDelta() + out_length))); + auto cipher_buffer = + AllocateBuffer(encryptor->pool(), encryptor->CiphertextLength(out_length)); + ::arrow::util::span out_span(out_buffer, out_length); int cipher_buffer_len = - encryptor->Encrypt(out_buffer, out_length, cipher_buffer->mutable_data()); + encryptor->Encrypt(out_span, cipher_buffer->mutable_span_as()); PARQUET_THROW_NOT_OK(out->Write(cipher_buffer->data(), cipher_buffer_len)); return static_cast(cipher_buffer_len); From e65c1e295d82c7076df484089a63fa3ba2bd55d1 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 11 Jul 2024 22:24:54 -0300 Subject: [PATCH 056/122] GH-43218: [C++] Resolve Abseil like any other dependency in the build system (#43219) ### Rationale for this change The workarounds around Abseil resolution don't seem necessary anymore and they don't work on all possible configurations of the build. ### What changes are included in this PR? Removal of the `ensure_absl` macro and adding a call to `resolve_dependency` when depending on the Google Cloud SDK (a GCS filesystem dependency) or gRPC (a flight dependency). ### Are these changes tested? Yes, by me trying different build configurations on my macOS and existing builds in CI. * GitHub Issue: #43218 Lead-authored-by: Felipe Oliveira Carvalho Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ci/docker/fedora-39-cpp.dockerfile | 3 +- cpp/CMakeLists.txt | 2 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 57 ++++++++++----------- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile index 8ecaa6c3ca784..33d11823094ce 100644 --- a/ci/docker/fedora-39-cpp.dockerfile +++ b/ci/docker/fedora-39-cpp.dockerfile @@ -77,8 +77,7 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # PYARROW_TEST_GANDIVA=OFF: GH-39695: We need to make LLVM symbols visible in # Python process explicitly if we use LLVM 17 or later. -ENV absl_SOURCE=BUNDLED \ - ARROW_ACERO=ON \ +ENV ARROW_ACERO=ON \ ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2e2a4971840a8..8247043b8bf84 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -689,7 +689,7 @@ endif() if("${ARROW_TEST_LINKAGE}" STREQUAL "shared") if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_SHARED) - message(FATAL_ERROR "If using shared linkage for unit tests, must also \ + message(FATAL_ERROR "If using ARROW_TEST_LINKAGE=shared, must also \ pass ARROW_BUILD_SHARED=on") endif() # Use shared linking for unit tests if it's available diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 8cb3ec83f57db..5b89a831ff7fe 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2874,33 +2874,6 @@ endmacro() # ---------------------------------------------------------------------- # Dependencies for Arrow Flight RPC -macro(ensure_absl) - if(NOT absl_FOUND) - if(${absl_SOURCE} STREQUAL "AUTO") - # We can't use resolve_dependency(absl 20211102) to use Abseil - # 20211102 or later because Abseil's CMake package uses "EXACT" - # version match strategy. Our CMake configuration will work with - # Abseil LTS 20211102 or later. So we want to accept Abseil LTS - # 20211102 or later. We need to update - # ARROW_ABSL_REQUIRED_LTS_VERSIONS list when new Abseil LTS is - # released. - set(ARROW_ABSL_REQUIRED_LTS_VERSIONS 20230125 20220623 20211102) - foreach(_VERSION ${ARROW_ABSL_REQUIRED_LTS_VERSIONS}) - find_package(absl ${_VERSION}) - if(absl_FOUND) - break() - endif() - endforeach() - # If we can't find Abseil LTS 20211102 or later, we use bundled - # Abseil. - if(NOT absl_FOUND) - set(absl_SOURCE "BUNDLED") - endif() - endif() - resolve_dependency(absl) - endif() -endmacro() - macro(build_absl) message(STATUS "Building Abseil-cpp from source") set(absl_FOUND TRUE) @@ -3845,7 +3818,6 @@ macro(build_grpc) TRUE PC_PACKAGE_NAMES libcares) - ensure_absl() message(STATUS "Building gRPC from source") @@ -4135,12 +4107,40 @@ macro(build_grpc) endif() endmacro() +if(ARROW_WITH_GOOGLE_CLOUD_CPP OR ARROW_WITH_GRPC) + set(ARROW_ABSL_REQUIRED_VERSION 20211102) + # Google Cloud C++ SDK and gRPC require Google Abseil + if(ARROW_WITH_GOOGLE_CLOUD_CPP) + set(ARROW_ABSL_CMAKE_PACKAGE_NAME Arrow) + set(ARROW_ABSL_PC_PACKAGE_NAME arrow) + else() + set(ARROW_ABSL_CMAKE_PACKAGE_NAME ArrowFlight) + set(ARROW_ABSL_PC_PACKAGE_NAME arrow-flight) + endif() + resolve_dependency(absl + ARROW_CMAKE_PACKAGE_NAME + ${ARROW_ABSL_CMAKE_PACKAGE_NAME} + ARROW_PC_PACKAGE_NAME + ${ARROW_ABSL_PC_PACKAGE_NAME} + HAVE_ALT + FALSE + FORCE_ANY_NEWER_VERSION + TRUE + REQUIRED_VERSION + ${ARROW_ABSL_REQUIRED_VERSION}) +endif() + if(ARROW_WITH_GRPC) if(NOT ARROW_ENABLE_THREADING) message(FATAL_ERROR "Can't use gRPC with ARROW_ENABLE_THREADING=OFF") endif() set(ARROW_GRPC_REQUIRED_VERSION "1.30.0") + if(absl_SOURCE STREQUAL "BUNDLED" AND NOT gRPC_SOURCE STREQUAL "BUNDLED") + # System gRPC can't be used with bundled Abseil + message(STATUS "Forcing gRPC_SOURCE to BUNDLED because absl_SOURCE is BUNDLED") + set(gRPC_SOURCE "BUNDLED") + endif() if(NOT Protobuf_SOURCE STREQUAL gRPC_SOURCE) # ARROW-15495: Protobuf/gRPC must come from the same source message(STATUS "Forcing gRPC_SOURCE to Protobuf_SOURCE (${Protobuf_SOURCE})") @@ -4259,7 +4259,6 @@ macro(build_google_cloud_cpp_storage) message(STATUS "Only building the google-cloud-cpp::storage component") # List of dependencies taken from https://github.com/googleapis/google-cloud-cpp/blob/main/doc/packaging.md - ensure_absl() build_crc32c_once() # Curl is required on all platforms, but building it internally might also trip over S3's copy. From 63cef36fa9125cd0305e4004bf375c8e6c302245 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 12 Jul 2024 15:22:29 -0400 Subject: [PATCH 057/122] MINOR: [Docs][Python] Add missing decimal256 docs entries (#43225) --- docs/source/python/api/datatypes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst index 62bf4b7723558..7edb4e161541d 100644 --- a/docs/source/python/api/datatypes.rst +++ b/docs/source/python/api/datatypes.rst @@ -58,6 +58,7 @@ These should be used to create Arrow data types and schemas. binary_view string_view decimal128 + decimal256 list_ large_list list_view @@ -101,6 +102,7 @@ functions above. Time64Type FixedSizeBinaryType Decimal128Type + Decimal256Type Field Schema RunEndEncodedType From bafb2d86c4c6f67d894a043648c00914ba42e904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 12 Jul 2024 21:30:53 +0200 Subject: [PATCH 058/122] GH-43232: [Release][Packaging][Python] Add tzdata as conda env requirement to avoid ORC failure (#43233) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Binary verifications for wheels on conda are failing on ORC test due to missing tzdata ### What changes are included in this PR? Adding tzdata as conda requirement when setting up the environment on the verification script ### Are these changes tested? Those changes have been tested locally ### Are there any user-facing changes? No * GitHub Issue: #43232 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- dev/release/verify-release-candidate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index fcaaa423a4c75..2f4b203f217af 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -1153,7 +1153,7 @@ test_linux_wheels() { local pyver=${python/m} for platform in ${platform_tags}; do show_header "Testing Python ${pyver} wheel for platform ${platform}" - CONDA_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda + CONDA_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_conda tzdata if ! VENV_ENV=wheel-${pyver}-${platform} PYTHON_VERSION=${pyver} maybe_setup_virtualenv; then continue fi From 2d4c80b3544b4d8bc8633058d130bafe5c93ba2a Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sat, 13 Jul 2024 05:33:21 +0900 Subject: [PATCH 059/122] GH-43114: [Archery][Dev] Support setuptools-scm >= 8.0.0 (#43156) ### Rationale for this change The update to `setuptools-scm` version 8.0.0 or higher ensures compatibility for Archery. ### What changes are included in this PR? - Added `Configuration` objects to handle necessary parameters (`get_describe_command`) - Fixed the `parse_git_version` function to align with the new version of `setuptools_scm` - Updated `setuptools_scm` requirement to version 8.0.0 or higher ### Are these changes tested? Yes, tested by CI. ### Are there any user-facing changes? No. * GitHub Issue: #43114 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- dev/archery/archery/crossbow/core.py | 9 ++++++++- dev/archery/setup.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 0b5d242bbaccf..4e6b42e485c0c 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -746,12 +746,19 @@ def get_version(root, **kwargs): subprojects, e.g. apache-arrow-js-XXX tags. """ from setuptools_scm.git import parse as parse_git_version + from setuptools_scm import Configuration # query the calculated version based on the git tags kwargs['describe_command'] = ( 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' ) - version = parse_git_version(root, **kwargs) + + # Create a Configuration object with necessary parameters + config = Configuration( + git_describe_command=kwargs['describe_command'] + ) + + version = parse_git_version(root, config=config, **kwargs) tag = str(version.tag) # We may get a development tag for the next version, such as "5.0.0.dev0", diff --git a/dev/archery/setup.py b/dev/archery/setup.py index cd3e2e9ca0834..f87316dcc7ab9 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -34,7 +34,7 @@ extras = { 'benchmark': ['pandas'], 'crossbow': ['github3.py', jinja_req, 'pygit2>=1.14.0', 'requests', - 'ruamel.yaml', 'setuptools_scm<8.0.0'], + 'ruamel.yaml', 'setuptools_scm>=8.0.0'], 'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml', 'setuptools_scm'], 'docker': ['ruamel.yaml', 'python-dotenv'], From 718415030f8ebcad14bc3fa10906beca6526c5e9 Mon Sep 17 00:00:00 2001 From: ben-freist <93315290+ben-freist@users.noreply.github.com> Date: Fri, 12 Jul 2024 22:40:04 +0200 Subject: [PATCH 060/122] GH-43095: [C++] Update bundled vendor/datetime to support for building with libc++ and C++20 (#43094) ### Rationale for this change We can't build with libc++ and C++20: CMake command line: ```bash cmake -DARROW_ENABLE_THREADING=OFF \ -DARROW_JEMALLOC=OFF \ -DCMAKE_CXX_STANDARD=20 \ -DCXX_ONLY_FLAGS="-stdlib=libc++" \ -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake --preset ninja-debug-minimal ../cpp/ ``` Error log: ``` In file included from ~/.conan2/p/b/arrowe39f77e638649/b/src/cpp/src/arrow/vendored/datetime/tz.cpp:90: ~/.conan2/p/b/arrowe39f77e638649/b/src/cpp/src/arrow/vendored/datetime/tz_private.h:295:12: error: use of overloaded operator '<<' is ambiguous (with operand types 'std::ostream' (aka 'basic_ostream') and 'const sys_seconds' (aka 'const time_point>>')) 295 | os << t.timepoint << "Z "; | ~~ ^ ~~~~~~~~~~~ /usr/lib/llvm-17/bin/../include/c++/v1/__chrono/ostream.h:46:1: note: candidate function [with _CharT = char, _Traits = std::char_traits, _Duration = std::chrono::duration] 46 | operator<<(basic_ostream<_CharT, _Traits>& __os, const sys_time<_Duration> __tp) { | ^ ~/.conan2/p/b/arrowe39f77e638649/b/src/cpp/src/arrow/vendored/datetime/date.h:4214:1: note: candidate function [with CharT = char, Traits = std::char_traits, Duration = std::chrono::duration] 4214 | operator<<(std::basic_ostream& os, const sys_time& tp) ``` ### What changes are included in this PR? Update the bundled vendor/datetime because the upstream has changes for this case: https://github.com/HowardHinnant/date/pull/827 ### Are these changes tested? ### Are there any user-facing changes? * GitHub Issue: #43095 Authored-by: Benjamin Freist Signed-off-by: Sutou Kouhei --- cpp/src/arrow/vendored/datetime/README.md | 2 +- cpp/src/arrow/vendored/datetime/date.h | 27 ++++-- cpp/src/arrow/vendored/datetime/tz.cpp | 95 +++++++++++++++++--- cpp/src/arrow/vendored/datetime/tz.h | 4 +- cpp/src/arrow/vendored/datetime/tz_private.h | 3 +- 5 files changed, 108 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index 0dd663c5e5acc..5a0993b7b4336 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -17,7 +17,7 @@ copies or substantial portions of the Software. Sources for datetime are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date). -Sources are taken from changeset cc4685a21e4a4fdae707ad1233c61bbaff241f93 +Sources are taken from changeset 1ead6715dec030d340a316c927c877a3c4e5a00c of the above project. The following changes are made: diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index fd2569c6de0f6..75e2624296672 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -4230,7 +4230,7 @@ inline std::basic_ostream& operator<<(std::basic_ostream& os, const local_time& ut) { - return (os << sys_time{ut.time_since_epoch()}); + return (date::operator<<(os, sys_time{ut.time_since_epoch()})); } namespace detail @@ -6353,7 +6353,10 @@ read_signed(std::basic_istream& is, unsigned m = 1, unsigned M = if (('0' <= c && c <= '9') || c == '-' || c == '+') { if (c == '-' || c == '+') + { (void)is.get(); + --M; + } auto x = static_cast(read_unsigned(is, std::max(m, 1u), M)); if (!is.fail()) { @@ -6526,7 +6529,14 @@ read(std::basic_istream& is, int a0, Args&& ...args) *e++ = static_cast(CharT(u % 10) + CharT{'0'}); u /= 10; } while (u > 0); +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif std::reverse(buf, e); +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic pop +#endif for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p) read(is, *p); } @@ -6592,7 +6602,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, CONSTDATA int not_a_year = numeric_limits::min(); CONSTDATA int not_a_2digit_year = 100; - CONSTDATA int not_a_century = not_a_year / 100; + CONSTDATA int not_a_century = numeric_limits::min(); CONSTDATA int not_a_month = 0; CONSTDATA int not_a_day = 0; CONSTDATA int not_a_hour = numeric_limits::min(); @@ -7519,7 +7529,12 @@ from_stream(std::basic_istream& is, const CharT* fmt, { auto c = static_cast(Traits::to_char_type(ic)); if (c == '-') + { neg = true; + (void)is.get(); + } + else if (c == '+') + (void)is.get(); } if (modified == CharT{}) { @@ -7735,9 +7750,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) + weeks{U-1} + (weekday{static_cast(wd)} - Sunday); - if (Y == not_a_year) - Y = static_cast(ymd_trial.year()); - else if (year{Y} != ymd_trial.year()) + if (year{Y} != ymd_trial.year()) goto broken; if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); @@ -7754,9 +7767,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) + weeks{W-1} + (weekday{static_cast(wd)} - Monday); - if (Y == not_a_year) - Y = static_cast(ymd_trial.year()); - else if (year{Y} != ymd_trial.year()) + if (year{Y} != ymd_trial.year()) goto broken; if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index e94c1bc8ae682..44c627775f3d7 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -96,6 +96,10 @@ # define TARGET_OS_SIMULATOR 0 #endif +#if defined(ANDROID) || defined(__ANDROID__) +#include +#endif + #if USE_OS_TZDB # include #endif @@ -2709,7 +2713,8 @@ operator<<(std::ostream& os, const time_zone& z) os.width(8); os << s.format_ << " "; os << s.until_year_ << ' ' << s.until_date_; - os << " " << s.until_utc_ << " UTC"; + os << " "; + date::operator<<(os, s.until_utc_) << " UTC"; os << " " << s.until_std_ << " STD"; os << " " << s.until_loc_; os << " " << make_time(s.initial_save_); @@ -2734,8 +2739,7 @@ operator<<(std::ostream& os, const time_zone& z) std::ostream& operator<<(std::ostream& os, const leap_second& x) { - using namespace date; - return os << x.date_ << " +"; + return date::operator<<(os, x.date_) << " +"; } #if USE_OS_TZDB @@ -3716,6 +3720,67 @@ get_tzdb() return get_tzdb_list().front(); } +namespace { + +class recursion_limiter +{ + unsigned depth_ = 0; + unsigned limit_; + + class restore_recursion_depth; + +public: + recursion_limiter(recursion_limiter const&) = delete; + recursion_limiter& operator=(recursion_limiter const&) = delete; + + explicit constexpr recursion_limiter(unsigned limit) noexcept; + + restore_recursion_depth count(); +}; + +class recursion_limiter::restore_recursion_depth +{ + recursion_limiter* rc_; + +public: + ~restore_recursion_depth(); + restore_recursion_depth(restore_recursion_depth&&) = default; + + explicit restore_recursion_depth(recursion_limiter* rc) noexcept; +}; + +inline +recursion_limiter::restore_recursion_depth::~restore_recursion_depth() +{ + --(rc_->depth_); +} + +inline +recursion_limiter::restore_recursion_depth::restore_recursion_depth(recursion_limiter* rc) + noexcept + : rc_{rc} +{} + +inline +constexpr +recursion_limiter::recursion_limiter(unsigned limit) noexcept + : limit_{limit} +{ +} + +inline +recursion_limiter::restore_recursion_depth +recursion_limiter::count() +{ + ++depth_; + if (depth_ > limit_) + throw std::runtime_error("recursion limit of " + + std::to_string(limit_) + " exceeded"); + return restore_recursion_depth{this}; +} + +} // unnamed namespace + const time_zone* #if HAS_STRING_VIEW tzdb::locate_zone(std::string_view tz_name) const @@ -3723,6 +3788,10 @@ tzdb::locate_zone(std::string_view tz_name) const tzdb::locate_zone(const std::string& tz_name) const #endif { + // If a link-to-link chain exceeds this limit, give up + thread_local recursion_limiter rc{10}; + auto restore_count = rc.count(); + auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name, #if HAS_STRING_VIEW [](const time_zone& z, const std::string_view& nm) @@ -3746,13 +3815,7 @@ tzdb::locate_zone(const std::string& tz_name) const }); if (li != links.end() && li->name() == tz_name) { - zi = std::lower_bound(zones.begin(), zones.end(), li->target(), - [](const time_zone& z, const std::string& nm) - { - return z.name() < nm; - }); - if (zi != zones.end() && zi->name() == li->target()) - return &*zi; + return locate_zone(li->target()); } #endif // !USE_OS_TZDB throw std::runtime_error(std::string(tz_name) + " not found in timezone database"); @@ -4038,6 +4101,18 @@ tzdb::current_zone() const if (!result.empty()) return locate_zone(result); #endif + // Fall through to try other means. + } + { + // On Android, it is not possible to use file based approach either, + // we have to ask the value of `persist.sys.timezone` system property +#if defined(ANDROID) || defined(__ANDROID__) + char sys_timezone[PROP_VALUE_MAX]; + if (__system_property_get("persist.sys.timezone", sys_timezone) > 0) + { + return locate_zone(sys_timezone); + } +#endif // defined(ANDROID) || defined(__ANDROID__) // Fall through to try other means. } { diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index 467db6d199793..df6d1a851ac9d 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -239,8 +239,8 @@ nonexistent_local_time::make_msg(local_time tp, const local_info& i) << i.first.abbrev << " and\n" << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' ' << i.second.abbrev - << " which are both equivalent to\n" - << i.first.end << " UTC"; + << " which are both equivalent to\n"; + date::operator<<(os, i.first.end) << " UTC"; return os.str(); } diff --git a/cpp/src/arrow/vendored/datetime/tz_private.h b/cpp/src/arrow/vendored/datetime/tz_private.h index 6b7a91493e103..a6bb8fd30a0c7 100644 --- a/cpp/src/arrow/vendored/datetime/tz_private.h +++ b/cpp/src/arrow/vendored/datetime/tz_private.h @@ -291,8 +291,7 @@ struct transition std::ostream& operator<<(std::ostream& os, const transition& t) { - using date::operator<<; - os << t.timepoint << "Z "; + date::operator<<(os, t.timepoint) << "Z "; if (t.info->offset >= std::chrono::seconds{0}) os << '+'; os << make_time(t.info->offset); From 03726178494c8978bf48b9bab15ed9676e7c9196 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sun, 14 Jul 2024 08:46:33 -0500 Subject: [PATCH 061/122] GH-43194: [R] R_existsVarInFrame isn't available earlier than R 4.2 (#43243) ### Rationale for this change `R_existsVarInFrame` doesn't exist before R 4.2, so we need to fall back to `Rf_findVarInFrame3` if it is not defined. Resolves #43194 ### What changes are included in this PR? `ifdef`s ### Are these changes tested? Yes, in our extended CI `test-r-versions`, `test-r-rstudio-r-base-4.1-opensuse155` ### Are there any user-facing changes? No * GitHub Issue: #43194 Authored-by: Jonathan Keane Signed-off-by: Jonathan Keane --- r/src/arrow_cpp11.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h index 5e6a7d5a42fb2..b2ed66b83c3d1 100644 --- a/r/src/arrow_cpp11.h +++ b/r/src/arrow_cpp11.h @@ -378,9 +378,17 @@ SEXP to_r6(const std::shared_ptr& ptr, const char* r6_class_name) { cpp11::external_pointer> xp(new std::shared_ptr(ptr)); SEXP r6_class = Rf_install(r6_class_name); +// R_existsVarInFrame doesn't exist before R 4.2, so we need to fall back to +// Rf_findVarInFrame3 if it is not defined. +#ifdef R_existsVarInFrame if (!R_existsVarInFrame(arrow::r::ns::arrow, r6_class)) { cpp11::stop("No arrow R6 class named '%s'", r6_class_name); } +#else + if (Rf_findVarInFrame3(arrow::r::ns::arrow, r6_class, FALSE) == R_UnboundValue) { + cpp11::stop("No arrow R6 class named '%s'", r6_class_name); + } +#endif // make call: $new() SEXP call = PROTECT(Rf_lang3(R_DollarSymbol, r6_class, arrow::r::symbols::new_)); From 4e939f522985811ec0bc49238db1adea3a5153b1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 15 Jul 2024 15:34:51 +0900 Subject: [PATCH 062/122] GH-43245: [Packaging][deb] Add missing libabsl-dev dependency (#43246) ### Rationale for this change If `libabsl-dev` dependency is missed from `libarrow-dev`, `find_package(Arrow)` is failed. ### What changes are included in this PR? * Add missing `libabsl-dev` dependency. * Remove `mold` from debian-trixie because `mold` is removed from debian-trixie: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1073168 ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #43245 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .../linux-packages/apache-arrow/apt/debian-trixie/Dockerfile | 1 - dev/tasks/linux-packages/apache-arrow/debian/control.in | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile index 3126c6d3cded0..8a6accbfc8b16 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile @@ -70,7 +70,6 @@ RUN \ llvm-dev \ lsb-release \ meson \ - mold \ ninja-build \ nlohmann-json3-dev \ pkg-config \ diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index c33e3ac791be1..38397e7ef6bd9 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -140,6 +140,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow1700 (= ${binary:Version}), +@USE_SYSTEM_GRPC@ libabsl-dev, libbrotli-dev, libbz2-dev, libcurl4-openssl-dev, From 57ac40ca23ebcaa4d42ae808137033689d3be9b1 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 15 Jul 2024 15:35:33 +0900 Subject: [PATCH 063/122] GH-43164: [C++] Fix CMake link order for AWS SDK (#43230) ### Rationale for this change To resolve conflicts with AWS SDK for C++ static variables when linked with libarrow by ensuring correct link order. ### What changes are included in this PR? - Adjusted `CMakeLists.txt` to set `ARROW_S3_TEST_EXTRA_LINK_LIBS`. - Ensured `libarrow` is linked before `libaws*` libraries. - Updated `s3fs_test` configuration to use the new link order. ### Are these changes tested? I ran the test locally and observed the same result as mentioned. Additionally, I confirmed that if `ARROW_S3` is set to OFF or if the configuration includes `exclude_tests=arrow-s3fs-test`, the test is excluded. ### Are there any user-facing changes? No. * GitHub Issue: #43164 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/CMakeLists.txt | 30 +++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index 0a31a64b7a3a4..dec4bb6e3d465 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -63,6 +63,23 @@ if(ARROW_AZURE) endif() if(ARROW_S3) + set(ARROW_S3_TEST_EXTRA_LINK_LIBS) + # arrow_shared/arrow_static is specified implicitly via + # arrow_testing_shared/arrow_testing_static but we specify + # arrow_shared/arrow_static explicitly here to ensure using libarrow + # before libaws* on link. If we use libaws*.a before libarrow, + # static variables storage of AWS SDK for C++ in libaws*.a may be + # mixed with one in libarrow. + if(ARROW_TEST_LINKAGE STREQUAL "shared") + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_shared) + else() + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static) + endif() + list(APPEND + ARROW_S3_TEST_EXTRA_LINK_LIBS + ${AWSSDK_LINK_LIBRARIES} + Boost::filesystem + Boost::system) add_arrow_test(s3fs_test SOURCES s3fs_test.cc @@ -70,18 +87,17 @@ if(ARROW_S3) EXTRA_LABELS filesystem EXTRA_LINK_LIBS - ${AWSSDK_LINK_LIBRARIES} - Boost::filesystem - Boost::system) + ${ARROW_S3_TEST_EXTRA_LINK_LIBS}) if(TARGET arrow-s3fs-test) set(ARROW_S3FS_TEST_COMPILE_DEFINITIONS) get_target_property(AWS_CPP_SDK_S3_TYPE aws-cpp-sdk-s3 TYPE) - # We need to initialize AWS C++ SDK for direct use (not via + # We need to initialize AWS SDK for C++ for direct use (not via # arrow::fs::S3FileSystem) in arrow-s3fs-test if we use static AWS - # C++ SDK and hide symbols of them. Because AWS C++ SDK has - # internal static variables that aren't shared in libarrow and + # SDK for C++ and hide symbols of them. Because AWS SDK for C++ + # has internal static variables that aren't shared in libarrow and # arrow-s3fs-test. It means that arrow::fs::InitializeS3() doesn't - # initialize AWS C++ SDK that is directly used in arrow-s3fs-test. + # initialize AWS SDK for C++ that is directly used in + # arrow-s3fs-test. if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND CXX_LINKER_SUPPORTS_VERSION_SCRIPT) list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_PRIVATE_STATIC") From d21761e3354821cc3fbeda81e51b83a33bc5c950 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 15 Jul 2024 16:22:06 +0200 Subject: [PATCH 064/122] GH-43211: [C++] Fix decimal benchmarks to avoid out-of-bounds accesses (#43212) ### Rationale for this change Some of the decimal benchmarks access their benchmark data in strides, without checking that the accesses fall within bounds. A side effect is that this will break benchmark history because the iterations/s calculation was wrong, even though actual performance is unchanged. ### Are these changes tested? By the continuous benchmarking jobs. ### Are there any user-facing changes? No. * GitHub Issue: #43211 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/util/decimal_benchmark.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/decimal_benchmark.cc b/cpp/src/arrow/util/decimal_benchmark.cc index d505532d71da1..fd77f451d3a05 100644 --- a/cpp/src/arrow/util/decimal_benchmark.cc +++ b/cpp/src/arrow/util/decimal_benchmark.cc @@ -77,7 +77,7 @@ static void ToString(benchmark::State& state) { // NOLINT non-const reference state.SetItemsProcessed(state.iterations() * values.size()); } -constexpr int32_t kValueSize = 10; +constexpr int32_t kValueSize = 12; static void BinaryCompareOp(benchmark::State& state) { // NOLINT non-const reference std::vector v1, v2; @@ -85,6 +85,8 @@ static void BinaryCompareOp(benchmark::State& state) { // NOLINT non-const refe v1.emplace_back(100 + x, 100 + x); v2.emplace_back(200 + x, 200 + x); } + static_assert(kValueSize % 4 == 0, + "kValueSize needs to be a multiple of 4 to avoid out-of-bounds accesses"); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 4) { auto equal = v1[x] == v2[x]; @@ -93,7 +95,7 @@ static void BinaryCompareOp(benchmark::State& state) { // NOLINT non-const refe benchmark::DoNotOptimize(less_than_or_equal); auto greater_than_or_equal1 = v1[x + 2] >= v2[x + 2]; benchmark::DoNotOptimize(greater_than_or_equal1); - auto greater_than_or_equal2 = v1[x + 3] >= v1[x + 3]; + auto greater_than_or_equal2 = v1[x + 3] >= v2[x + 3]; benchmark::DoNotOptimize(greater_than_or_equal2); } } @@ -106,6 +108,8 @@ static void BinaryCompareOpConstant( for (int x = 0; x < kValueSize; x++) { v1.emplace_back(100 + x, 100 + x); } + static_assert(kValueSize % 4 == 0, + "kValueSize needs to be a multiple of 4 to avoid out-of-bounds accesses"); BasicDecimal128 constant(313, 212); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 4) { @@ -245,6 +249,8 @@ static void UnaryOp(benchmark::State& state) { // NOLINT non-const reference v.emplace_back(100 + x, 100 + x); } + static_assert(kValueSize % 2 == 0, + "kValueSize needs to be a multiple of 2 to avoid out-of-bounds accesses"); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 2) { auto abs = v[x].Abs(); @@ -274,6 +280,8 @@ static void BinaryBitOp(benchmark::State& state) { // NOLINT non-const referenc v2.emplace_back(200 + x, 200 + x); } + static_assert(kValueSize % 2 == 0, + "kValueSize needs to be a multiple of 2 to avoid out-of-bounds accesses"); for (auto _ : state) { for (int x = 0; x < kValueSize; x += 2) { benchmark::DoNotOptimize(v1[x] |= v2[x]); From de61a2057ccfbf194754d74f8e0b31d8f5068ccf Mon Sep 17 00:00:00 2001 From: Alex Malins Date: Tue, 16 Jul 2024 00:06:12 +0900 Subject: [PATCH 065/122] MINOR: [Docs] Fix nNull typo in pyarrow.compute.sort_indices docstring (#43251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Minor typo fix for the [sort_indices docstring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.sort_indices.html) ### What changes are included in this PR? `"nNull"` typo to `"null"` ### Are these changes tested? Untested, just a single char change. ### Are there any user-facing changes? Yes affects https://arrow.apache.org/docs/python/generated/pyarrow.compute.sort_indices.html Authored-by: Alex Malins Signed-off-by: Raúl Cumplido --- cpp/src/arrow/compute/kernels/vector_sort.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index ad22fa8d365c4..8766ca3baac96 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -870,7 +870,7 @@ const SortOptions* GetDefaultSortOptions() { const FunctionDoc sort_indices_doc( "Return the indices that would sort an array, record batch or table", ("This function computes an array of indices that define a stable sort\n" - "of the input array, record batch or table. By default, nNull values are\n" + "of the input array, record batch or table. By default, null values are\n" "considered greater than any other value and are therefore sorted at the\n" "end of the input. For floating-point types, NaNs are considered greater\n" "than any other non-null value, but smaller than null values.\n" From 338067822fffce2529727c85b9f4e989dcce23ef Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 15 Jul 2024 12:06:30 -0300 Subject: [PATCH 066/122] MINOR: [CI] Remove paleolimbot from codeowners (#43253) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is part of a larger effort in reconfiguring my personal GitHub notification strategy. I'm still happy to be pinged on any R things as required! ### What changes are included in this PR? Remove myself from the CODEOWNERS file ### Are these changes tested? Not needed! ### Are there any user-facing changes? Nope! Authored-by: Dewey Dunnington Signed-off-by: Raúl Cumplido --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e495bfd147de6..03252657feaf1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -40,7 +40,7 @@ /matlab/ @kevingurney @kou @sgilmore10 /python/pyarrow/_flight.pyx @lidavidm /python/pyarrow/**/*gandiva* @wjones127 -/r/ @paleolimbot @thisisnic +/r/ @thisisnic /ruby/ @kou /swift/ @kou From 005125790d83ecf50ecc20026d97021d7eb19fe2 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 15 Jul 2024 12:08:50 -0300 Subject: [PATCH 067/122] GH-43238: [C++][FlightRPC] Reduce repetition in flight/types.cc in serde functions (#43237) ### Rationale for this change Local templates can be used to reduce repetition in serde code. ### What changes are included in this PR? - `Deserialize` and `SerializeToString` on the types in `flight/types.cc` are now defined in terms of a few templates - Use of `ParseFromArray` to parse `std::string_view` inputs instead of wrapping it in a `google::protobuf::io::ArrayInputStream` ### Are these changes tested? By existing tests. * GitHub Issue: #43238 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/flight/types.cc | 514 ++++++++-------------------------- 1 file changed, 116 insertions(+), 398 deletions(-) diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index 39b59f65d9cfb..a04956a4ea3f7 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -41,6 +41,63 @@ namespace arrow { namespace flight { +namespace { + +ARROW_NOINLINE +Status ProtoStringInputTooBig(const char* name) { + return Status::Invalid("Serialized ", name, " size should not exceed 2 GiB"); +} + +ARROW_NOINLINE +Status ProtoStringOutputTooBig(const char* name) { + return Status::Invalid("Serialized ", name, " exceeded 2 GiB limit"); +} + +ARROW_NOINLINE +Status InvalidProtoString(const char* name) { + return Status::Invalid("Not a valid ", name); +} + +// Status-returning ser/de functions that allow reuse of the same output objects + +template +Status ParseFromString(const char* name, std::string_view serialized, PBType* out) { + if (serialized.size() > static_cast(std::numeric_limits::max())) { + return ProtoStringInputTooBig(name); + } + if (!out->ParseFromArray(serialized.data(), static_cast(serialized.size()))) { + return InvalidProtoString(name); + } + return Status::OK(); +} + +template +Status SerializeToString(const char* name, const T& in, PBType* out_pb, + std::string* out) { + RETURN_NOT_OK(internal::ToProto(in, out_pb)); + return out_pb->SerializeToString(out) ? Status::OK() : ProtoStringOutputTooBig(name); +} + +// Result-returning ser/de functions (more convenient) + +template +arrow::Result DeserializeProtoString(const char* name, std::string_view serialized) { + PBType pb; + RETURN_NOT_OK(ParseFromString(name, serialized, &pb)); + T out; + RETURN_NOT_OK(internal::FromProto(pb, &out)); + return out; +} + +template +arrow::Result SerializeToProtoString(const char* name, const T& in) { + PBType pb; + std::string out; + RETURN_NOT_OK(SerializeToString(name, in, &pb, &out)); + return out; +} + +} // namespace const char* kSchemeGrpc = "grpc"; const char* kSchemeGrpcTcp = "grpc+tcp"; @@ -174,54 +231,23 @@ bool SchemaResult::Equals(const SchemaResult& other) const { } arrow::Result SchemaResult::SerializeToString() const { - pb::SchemaResult pb_schema_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_schema_result)); - - std::string out; - if (!pb_schema_result.SerializeToString(&out)) { - return Status::IOError("Serialized SchemaResult exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("SchemaResult", *this); } arrow::Result SchemaResult::Deserialize(std::string_view serialized) { pb::SchemaResult pb_schema_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized SchemaResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_schema_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid SchemaResult"); - } + RETURN_NOT_OK(ParseFromString("SchemaResult", serialized, &pb_schema_result)); return SchemaResult{pb_schema_result.schema()}; } arrow::Result FlightDescriptor::SerializeToString() const { - pb::FlightDescriptor pb_descriptor; - RETURN_NOT_OK(internal::ToProto(*this, &pb_descriptor)); - - std::string out; - if (!pb_descriptor.SerializeToString(&out)) { - return Status::IOError("Serialized FlightDescriptor exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("FlightDescriptor", *this); } arrow::Result FlightDescriptor::Deserialize( std::string_view serialized) { - pb::FlightDescriptor pb_descriptor; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized FlightDescriptor size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_descriptor.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid FlightDescriptor"); - } - FlightDescriptor out; - RETURN_NOT_OK(internal::FromProto(pb_descriptor, &out)); - return out; + return DeserializeProtoString( + "FlightDescriptor", serialized); } std::string Ticket::ToString() const { @@ -233,29 +259,11 @@ std::string Ticket::ToString() const { bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; } arrow::Result Ticket::SerializeToString() const { - pb::Ticket pb_ticket; - RETURN_NOT_OK(internal::ToProto(*this, &pb_ticket)); - - std::string out; - if (!pb_ticket.SerializeToString(&out)) { - return Status::IOError("Serialized Ticket exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("Ticket", *this); } arrow::Result Ticket::Deserialize(std::string_view serialized) { - pb::Ticket pb_ticket; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Ticket size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_ticket.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Ticket"); - } - Ticket out; - RETURN_NOT_OK(internal::FromProto(pb_ticket, &out)); - return out; + return DeserializeProtoString("Ticket", serialized); } arrow::Result FlightInfo::Make(const Schema& schema, @@ -287,27 +295,13 @@ arrow::Result> FlightInfo::GetSchema( } arrow::Result FlightInfo::SerializeToString() const { - pb::FlightInfo pb_info; - RETURN_NOT_OK(internal::ToProto(*this, &pb_info)); - - std::string out; - if (!pb_info.SerializeToString(&out)) { - return Status::IOError("Serialized FlightInfo exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("FlightInfo", *this); } arrow::Result> FlightInfo::Deserialize( std::string_view serialized) { pb::FlightInfo pb_info; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized FlightInfo size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_info.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid FlightInfo"); - } + RETURN_NOT_OK(ParseFromString("FlightInfo", serialized, &pb_info)); ARROW_ASSIGN_OR_RAISE(FlightInfo info, internal::FromProto(pb_info)); return std::make_unique(std::move(info)); } @@ -347,27 +341,13 @@ bool FlightInfo::Equals(const FlightInfo& other) const { } arrow::Result PollInfo::SerializeToString() const { - pb::PollInfo pb_info; - RETURN_NOT_OK(internal::ToProto(*this, &pb_info)); - - std::string out; - if (!pb_info.SerializeToString(&out)) { - return Status::IOError("Serialized PollInfo exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("PollInfo", *this); } arrow::Result> PollInfo::Deserialize( std::string_view serialized) { pb::PollInfo pb_info; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized PollInfo size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_info.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid PollInfo"); - } + RETURN_NOT_OK(ParseFromString("PollInfo", serialized, &pb_info)); PollInfo info; RETURN_NOT_OK(internal::FromProto(pb_info, &info)); return std::make_unique(std::move(info)); @@ -448,31 +428,14 @@ bool CancelFlightInfoRequest::Equals(const CancelFlightInfoRequest& other) const } arrow::Result CancelFlightInfoRequest::SerializeToString() const { - pb::CancelFlightInfoRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized CancelFlightInfoRequest exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("CancelFlightInfoRequest", + *this); } arrow::Result CancelFlightInfoRequest::Deserialize( std::string_view serialized) { - pb::CancelFlightInfoRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized CancelFlightInfoRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CancelFlightInfoRequest"); - } - CancelFlightInfoRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; + return DeserializeProtoString( + "CancelFlightInfoRequest", serialized); } static const char* const SetSessionOptionStatusNames[] = {"Unspecified", "InvalidName", @@ -594,32 +557,14 @@ bool SetSessionOptionsRequest::Equals(const SetSessionOptionsRequest& other) con } arrow::Result SetSessionOptionsRequest::SerializeToString() const { - pb::SetSessionOptionsRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized SetSessionOptionsRequest exceeded 2GiB limit"); - } - return out; + return SerializeToProtoString("SetSessionOptionsRequest", + *this); } arrow::Result SetSessionOptionsRequest::Deserialize( std::string_view serialized) { - // TODO these & SerializeToString should all be factored out to a superclass - pb::SetSessionOptionsRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized SetSessionOptionsRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid SetSessionOptionsRequest"); - } - SetSessionOptionsRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; + return DeserializeProtoString( + "SetSessionOptionsRequest", serialized); } // SetSessionOptionsResult @@ -640,31 +585,14 @@ bool SetSessionOptionsResult::Equals(const SetSessionOptionsResult& other) const } arrow::Result SetSessionOptionsResult::SerializeToString() const { - pb::SetSessionOptionsResult pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError("Serialized SetSessionOptionsResult exceeded 2GiB limit"); - } - return out; + return SerializeToProtoString("SetSessionOptionsResult", + *this); } arrow::Result SetSessionOptionsResult::Deserialize( std::string_view serialized) { - pb::SetSessionOptionsResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized SetSessionOptionsResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid SetSessionOptionsResult"); - } - SetSessionOptionsResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; + return DeserializeProtoString( + "SetSessionOptionsResult", serialized); } // GetSessionOptionsRequest @@ -678,40 +606,21 @@ bool GetSessionOptionsRequest::Equals(const GetSessionOptionsRequest& other) con } arrow::Result GetSessionOptionsRequest::SerializeToString() const { - pb::GetSessionOptionsRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized GetSessionOptionsRequest exceeded 2GiB limit"); - } - return out; + return SerializeToProtoString("GetSessionOptionsRequest", + *this); } arrow::Result GetSessionOptionsRequest::Deserialize( std::string_view serialized) { - pb::GetSessionOptionsRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized GetSessionOptionsRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid GetSessionOptionsRequest"); - } - GetSessionOptionsRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; + return DeserializeProtoString( + "GetSessionOptionsRequest", serialized); } // GetSessionOptionsResult std::string GetSessionOptionsResult::ToString() const { std::stringstream ss; - ss << "("GetSessionOptionsResult", + *this); } arrow::Result GetSessionOptionsResult::Deserialize( std::string_view serialized) { - pb::GetSessionOptionsResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized GetSessionOptionsResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid GetSessionOptionsResult"); - } - GetSessionOptionsResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; + return DeserializeProtoString( + "GetSessionOptionsResult", serialized); } // CloseSessionRequest @@ -754,30 +646,13 @@ std::string CloseSessionRequest::ToString() const { return " CloseSessionRequest::SerializeToString() const { - pb::CloseSessionRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized CloseSessionRequest exceeded 2GiB limit"); - } - return out; + return SerializeToProtoString("CloseSessionRequest", *this); } arrow::Result CloseSessionRequest::Deserialize( std::string_view serialized) { - pb::CloseSessionRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized CloseSessionRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CloseSessionRequest"); - } - CloseSessionRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; + return DeserializeProtoString( + "CloseSessionRequest", serialized); } // CloseSessionResult @@ -795,30 +670,13 @@ bool CloseSessionResult::Equals(const CloseSessionResult& other) const { } arrow::Result CloseSessionResult::SerializeToString() const { - pb::CloseSessionResult pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError("Serialized CloseSessionResult exceeded 2GiB limit"); - } - return out; + return SerializeToProtoString("CloseSessionResult", *this); } arrow::Result CloseSessionResult::Deserialize( std::string_view serialized) { - pb::CloseSessionResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized CloseSessionResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CloseSessionResult"); - } - CloseSessionResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; + return DeserializeProtoString( + "CloseSessionResult", serialized); } Location::Location() { uri_ = std::make_shared(); } @@ -924,29 +782,12 @@ bool FlightEndpoint::Equals(const FlightEndpoint& other) const { } arrow::Result FlightEndpoint::SerializeToString() const { - pb::FlightEndpoint pb_flight_endpoint; - RETURN_NOT_OK(internal::ToProto(*this, &pb_flight_endpoint)); - - std::string out; - if (!pb_flight_endpoint.SerializeToString(&out)) { - return Status::IOError("Serialized FlightEndpoint exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("FlightEndpoint", *this); } arrow::Result FlightEndpoint::Deserialize(std::string_view serialized) { - pb::FlightEndpoint pb_flight_endpoint; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized FlightEndpoint size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_flight_endpoint.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid FlightEndpoint"); - } - FlightEndpoint out; - RETURN_NOT_OK(internal::FromProto(pb_flight_endpoint, &out)); - return out; + return DeserializeProtoString("FlightEndpoint", + serialized); } std::string RenewFlightEndpointRequest::ToString() const { @@ -960,31 +801,15 @@ bool RenewFlightEndpointRequest::Equals(const RenewFlightEndpointRequest& other) } arrow::Result RenewFlightEndpointRequest::SerializeToString() const { - pb::RenewFlightEndpointRequest pb_request; - RETURN_NOT_OK(internal::ToProto(*this, &pb_request)); - - std::string out; - if (!pb_request.SerializeToString(&out)) { - return Status::IOError("Serialized RenewFlightEndpointRequest exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString( + "RenewFlightEndpointRequest", *this); } arrow::Result RenewFlightEndpointRequest::Deserialize( std::string_view serialized) { - pb::RenewFlightEndpointRequest pb_request; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized RenewFlightEndpointRequest size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_request.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid RenewFlightEndpointRequest"); - } - RenewFlightEndpointRequest out; - RETURN_NOT_OK(internal::FromProto(pb_request, &out)); - return out; + return DeserializeProtoString("RenewFlightEndpointRequest", + serialized); } std::string ActionType::ToString() const { @@ -1023,29 +848,11 @@ bool ActionType::Equals(const ActionType& other) const { } arrow::Result ActionType::SerializeToString() const { - pb::ActionType pb_action_type; - RETURN_NOT_OK(internal::ToProto(*this, &pb_action_type)); - - std::string out; - if (!pb_action_type.SerializeToString(&out)) { - return Status::IOError("Serialized ActionType exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("ActionType", *this); } arrow::Result ActionType::Deserialize(std::string_view serialized) { - pb::ActionType pb_action_type; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized ActionType size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_action_type.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid ActionType"); - } - ActionType out; - RETURN_NOT_OK(internal::FromProto(pb_action_type, &out)); - return out; + return DeserializeProtoString("ActionType", serialized); } std::string Criteria::ToString() const { @@ -1057,29 +864,11 @@ bool Criteria::Equals(const Criteria& other) const { } arrow::Result Criteria::SerializeToString() const { - pb::Criteria pb_criteria; - RETURN_NOT_OK(internal::ToProto(*this, &pb_criteria)); - - std::string out; - if (!pb_criteria.SerializeToString(&out)) { - return Status::IOError("Serialized Criteria exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("Criteria", *this); } arrow::Result Criteria::Deserialize(std::string_view serialized) { - pb::Criteria pb_criteria; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Criteria size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_criteria.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Criteria"); - } - Criteria out; - RETURN_NOT_OK(internal::FromProto(pb_criteria, &out)); - return out; + return DeserializeProtoString("Criteria", serialized); } std::string Action::ToString() const { @@ -1101,29 +890,11 @@ bool Action::Equals(const Action& other) const { } arrow::Result Action::SerializeToString() const { - pb::Action pb_action; - RETURN_NOT_OK(internal::ToProto(*this, &pb_action)); - - std::string out; - if (!pb_action.SerializeToString(&out)) { - return Status::IOError("Serialized Action exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("Action", *this); } arrow::Result Action::Deserialize(std::string_view serialized) { - pb::Action pb_action; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Action size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_action.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Action"); - } - Action out; - RETURN_NOT_OK(internal::FromProto(pb_action, &out)); - return out; + return DeserializeProtoString("Action", serialized); } std::string Result::ToString() const { @@ -1142,29 +913,11 @@ bool Result::Equals(const Result& other) const { } arrow::Result Result::SerializeToString() const { - pb::Result pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError("Serialized Result exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("Result", *this); } arrow::Result Result::Deserialize(std::string_view serialized) { - pb::Result pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized Result size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid Result"); - } - Result out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; + return DeserializeProtoString("Result", serialized); } std::string CancelFlightInfoResult::ToString() const { @@ -1178,32 +931,14 @@ bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const { } arrow::Result CancelFlightInfoResult::SerializeToString() const { - pb::CancelFlightInfoResult pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError( - "Serialized ActionCancelFlightInfoResult exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("CancelFlightInfoResult", + *this); } arrow::Result CancelFlightInfoResult::Deserialize( std::string_view serialized) { - pb::CancelFlightInfoResult pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid( - "Serialized ActionCancelFlightInfoResult size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid CancelFlightInfoResult"); - } - CancelFlightInfoResult out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; + return DeserializeProtoString( + "CancelFlightInfoResult", serialized); } std::ostream& operator<<(std::ostream& os, CancelStatus status) { @@ -1321,28 +1056,11 @@ bool BasicAuth::Equals(const BasicAuth& other) const { } arrow::Result BasicAuth::Deserialize(std::string_view serialized) { - pb::BasicAuth pb_result; - if (serialized.size() > static_cast(std::numeric_limits::max())) { - return Status::Invalid("Serialized BasicAuth size should not exceed 2 GiB"); - } - google::protobuf::io::ArrayInputStream input(serialized.data(), - static_cast(serialized.size())); - if (!pb_result.ParseFromZeroCopyStream(&input)) { - return Status::Invalid("Not a valid BasicAuth"); - } - BasicAuth out; - RETURN_NOT_OK(internal::FromProto(pb_result, &out)); - return out; + return DeserializeProtoString("BasicAuth", serialized); } arrow::Result BasicAuth::SerializeToString() const { - pb::BasicAuth pb_result; - RETURN_NOT_OK(internal::ToProto(*this, &pb_result)); - std::string out; - if (!pb_result.SerializeToString(&out)) { - return Status::IOError("Serialized BasicAuth exceeded 2 GiB limit"); - } - return out; + return SerializeToProtoString("BasicAuth", *this); } //------------------------------------------------------------ From 1fce29353e7db3a1d91c5e624de4af7610bb6d52 Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:30:58 -0400 Subject: [PATCH 068/122] GH-41993 [Go] IPC writer shift voffsets when offsets array does not start from zero (#43176) ### Rationale for this change It should be valid to specify offset buffers that do not start from zero. This particularly important for when multiple arrays share a single value buffer. ### What changes are included in this PR? - Add condition to shift offsets buffer when it does not start from zero - Test to reproduce failure and then validate fix ### Are these changes tested? Yes ### Are there any user-facing changes? Variable-length binary arrays that share a value buffer will not result in errors. * GitHub Issue: #41993 Authored-by: Joel Lubinitsky Signed-off-by: Joel Lubinitsky --- go/arrow/ipc/ipc_test.go | 68 ++++++++++++++++++++++++++++++++++++++++ go/arrow/ipc/writer.go | 22 +++++++++++-- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go index 6348eff2e8393..17499c45b2059 100644 --- a/go/arrow/ipc/ipc_test.go +++ b/go/arrow/ipc/ipc_test.go @@ -31,6 +31,7 @@ import ( "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/bitutil" "github.com/apache/arrow/go/v17/arrow/ipc" "github.com/apache/arrow/go/v17/arrow/memory" ) @@ -620,3 +621,70 @@ func TestIpcEmptyMap(t *testing.T) { assert.Zero(t, r.Record().NumRows()) assert.True(t, arrow.TypeEqual(dt, r.Record().Column(0).DataType())) } + +// GH-41993 +func TestArrowBinaryIPCWriterTruncatedVOffsets(t *testing.T) { + var buf bytes.Buffer + buf.WriteString("apple") + buf.WriteString("pear") + buf.WriteString("banana") + values := buf.Bytes() + + offsets := []int32{5, 9, 15} // <-- only "pear" and "banana" + voffsets := arrow.Int32Traits.CastToBytes(offsets) + + validity := []byte{0} + bitutil.SetBit(validity, 0) + bitutil.SetBit(validity, 1) + + data := array.NewData( + arrow.BinaryTypes.String, + 2, // <-- only "pear" and "banana" + []*memory.Buffer{ + memory.NewBufferBytes(validity), + memory.NewBufferBytes(voffsets), + memory.NewBufferBytes(values), + }, + nil, + 0, + 0, + ) + + str := array.NewStringData(data) + require.Equal(t, 2, str.Len()) + require.Equal(t, "pear", str.Value(0)) + require.Equal(t, "banana", str.Value(1)) + + schema := arrow.NewSchema([]arrow.Field{ + { + Name: "string", + Type: arrow.BinaryTypes.String, + Nullable: true, + }, + }, nil) + record := array.NewRecord(schema, []arrow.Array{str}, 2) + + var output bytes.Buffer + writer := ipc.NewWriter(&output, ipc.WithSchema(schema)) + + require.NoError(t, writer.Write(record)) + require.NoError(t, writer.Close()) + + reader, err := ipc.NewReader(bytes.NewReader(output.Bytes()), ipc.WithSchema(schema)) + require.NoError(t, err) + defer reader.Release() + + require.True(t, reader.Next()) + require.NoError(t, reader.Err()) + + rec := reader.Record() + require.EqualValues(t, 1, rec.NumCols()) + require.EqualValues(t, 2, rec.NumRows()) + + col, ok := rec.Column(0).(*array.String) + require.True(t, ok) + require.Equal(t, "pear", col.Value(0)) + require.Equal(t, "banana", col.Value(1)) + + require.False(t, reader.Next()) +} diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index f2afd2db4274c..63c0b612670b0 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -853,19 +853,35 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer return nil } + dataTypeWidth := arr.DataType().Layout().Buffers[1].ByteWidth + // if we have a non-zero offset, then the value offsets do not start at // zero. we must a) create a new offsets array with shifted offsets and // b) slice the values array accordingly - // + hasNonZeroOffset := data.Offset() != 0 + // or if there are more value offsets than values (the array has been sliced) // we need to trim off the trailing offsets - needsTruncateAndShift := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() + hasMoreOffsetsThanValues := offsetBytesNeeded < voffsets.Len() + + // or if the offsets do not start from the zero index, we need to shift them + // and slice the values array + var firstOffset int64 + if dataTypeWidth == 8 { + firstOffset = arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[0] + } else { + firstOffset = int64(arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[0]) + } + offsetsDoNotStartFromZero := firstOffset != 0 + + // determine whether the offsets array should be shifted + needsTruncateAndShift := hasNonZeroOffset || hasMoreOffsetsThanValues || offsetsDoNotStartFromZero if needsTruncateAndShift { shiftedOffsets := memory.NewResizableBuffer(w.mem) shiftedOffsets.Resize(offsetBytesNeeded) - switch arr.DataType().Layout().Buffers[1].ByteWidth { + switch dataTypeWidth { case 8: dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes()) offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()+1] From 7d542e4287d43e241743de963f9d93a90906624d Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Mon, 15 Jul 2024 20:39:32 +0200 Subject: [PATCH 069/122] GH-43217: [Java] Remove flight-core shaded jars (#43224) ### Rationale for this change Remove extra jars generated by `flight-core` modules: - they do not seem to be used by the project internally and does not seem to satisfy some external purpose as well - they are not compatible with JPMS ### Are these changes tested? CI/CD ### Are there any user-facing changes? Several extras jars attached to `org.apache.arrow:flight-core` artifact will stop being published to Apache Maven Repository/Maven Central. * GitHub Issue: #43217 Authored-by: Laurent Goujon Signed-off-by: Dane Pitkin --- dev/tasks/tasks.yml | 3 -- java/flight/flight-core/pom.xml | 82 --------------------------------- 2 files changed, 85 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 45417acf856b5..9c7d1ff3bbe12 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -810,10 +810,7 @@ tasks: - arrow-vector-{no_rc_snapshot_version}.pom - flight-core-{no_rc_snapshot_version}-cyclonedx.json - flight-core-{no_rc_snapshot_version}-cyclonedx.xml - - flight-core-{no_rc_snapshot_version}-jar-with-dependencies.jar - flight-core-{no_rc_snapshot_version}-javadoc.jar - - flight-core-{no_rc_snapshot_version}-shaded-ext.jar - - flight-core-{no_rc_snapshot_version}-shaded.jar - flight-core-{no_rc_snapshot_version}-sources.jar - flight-core-{no_rc_snapshot_version}-tests.jar - flight-core-{no_rc_snapshot_version}.jar diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 5f82de2724b1d..3e0f4f8ce1b7d 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -155,71 +155,6 @@ under the License.
- - org.apache.maven.plugins - maven-shade-plugin - - - shade-main - - shade - - package - - false - true - shaded - - - io.grpc:* - com.google.protobuf:* - - - - - com.google.protobuf - arrow.flight.com.google.protobuf - - - - - - - - - shade-ext - - shade - - package - - false - true - shaded-ext - - - io.grpc:* - com.google.protobuf:* - com.google.guava:* - - - - - com.google.protobuf - arrow.flight.com.google.protobuf - - - com.google.common - arrow.flight.com.google.common - - - - - - - - - org.xolstice.maven.plugins protobuf-maven-plugin @@ -261,23 +196,6 @@ under the License. - - maven-assembly-plugin - - - jar-with-dependencies - - - - - make-assembly - - single - - package - - - From 1aa727fb391c0613703bc1bf9f6db502f3ef4797 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:38:36 +0900 Subject: [PATCH 070/122] MINOR: [CI] Bump actions/setup-python from 5.1.0 to 5.1.1 (#43261) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5.1.0 to 5.1.1.
Release notes

Sourced from actions/setup-python's releases.

v5.1.1

What's Changed

Bug fixes:

  • fix(ci): update all failing workflows by @​mayeut in actions/setup-python#863 This update ensures compatibility and optimal performance of workflows on the latest macOS version.

Documentation changes:

Dependency updates:

New Contributors

Full Changelog: https://github.com/actions/setup-python/compare/v5...v5.1.1

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-python&package-manager=github_actions&previous-version=5.1.0&new-version=5.1.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- .github/workflows/archery.yml | 2 +- .github/workflows/comment_bot.yml | 2 +- .github/workflows/cpp.yml | 4 ++-- .github/workflows/csharp.yml | 2 +- .github/workflows/dev.yml | 4 ++-- .github/workflows/docs.yml | 2 +- .github/workflows/docs_light.yml | 2 +- .github/workflows/go.yml | 6 +++--- .github/workflows/integration.yml | 2 +- .github/workflows/java.yml | 2 +- .github/workflows/java_jni.yml | 4 ++-- .github/workflows/java_nightly.yml | 2 +- .github/workflows/js.yml | 2 +- .github/workflows/pr_bot.yml | 2 +- .github/workflows/python.yml | 4 ++-- .github/workflows/r.yml | 4 ++-- .github/workflows/r_nightly.yml | 2 +- .github/workflows/ruby.yml | 2 +- 18 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index c698baba2c816..87f365b9065c8 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -59,7 +59,7 @@ jobs: shell: bash run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true - name: Setup Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: '3.9' - name: Install pygit2 binary wheel diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index a34856d2dc81a..1138c0a02f812 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -41,7 +41,7 @@ jobs: # fetch the tags for version number generation fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Install Archery and Crossbow dependencies diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index e539fadb859fe..eff0b0204e6bd 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -246,7 +246,7 @@ jobs: $(brew --prefix bash)/bin/bash \ ci/scripts/install_minio.sh latest ${ARROW_HOME} - name: Set up Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: 3.12 - name: Install Google Cloud Storage Testbench @@ -467,7 +467,7 @@ jobs: https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z chmod +x /usr/local/bin/minio.exe - name: Set up Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: 3.9 - name: Install Google Cloud Storage Testbench diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index e4db9f482e206..c9dd73bbd85f0 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -108,7 +108,7 @@ jobs: with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Checkout Arrow diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 5aec3638a8967..aeaa723e55509 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -46,7 +46,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Install pre-commit @@ -105,7 +105,7 @@ jobs: with: fetch-depth: 0 - name: Install Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: '3.12' - name: Install Ruby diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 36a0dc014db8d..b6075746ff40a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -53,7 +53,7 @@ jobs: key: debian-docs-${{ hashFiles('cpp/**') }} restore-keys: debian-docs- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 947e2ac21b83c..f66e8473e2516 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -59,7 +59,7 @@ jobs: key: conda-docs-${{ hashFiles('cpp/**') }} restore-keys: conda-docs- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Setup Archery diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index c247a89128b34..0d369d252b56c 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -213,7 +213,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -253,7 +253,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -345,7 +345,7 @@ jobs: github.event_name == 'push' && github.repository == 'apache/arrow' && github.ref_name == 'main' - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: '3.10' - name: Run Benchmarks diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f53f4aeb505d2..46d422a53ae69 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -90,7 +90,7 @@ jobs: key: conda-${{ hashFiles('cpp/**') }} restore-keys: conda- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 08dbe7c8068c0..8eb2682dc077d 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -77,7 +77,7 @@ jobs: key: maven-${{ hashFiles('java/**') }} restore-keys: maven- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index ea5f8d694a9c6..533da7c36be34 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -71,7 +71,7 @@ jobs: key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} restore-keys: java-jni-manylinux-2014- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -111,7 +111,7 @@ jobs: key: maven-${{ hashFiles('java/**') }} restore-keys: maven- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml index f40d4ce5b42d6..72afb6dbf1c1d 100644 --- a/.github/workflows/java_nightly.yml +++ b/.github/workflows/java_nightly.yml @@ -58,7 +58,7 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: cache: 'pip' python-version: 3.12 diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index c11c8254011f6..ad22968a3a68b 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -55,7 +55,7 @@ jobs: with: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml index e589610f536b3..7dd06b6aeec09 100644 --- a/.github/workflows/pr_bot.yml +++ b/.github/workflows/pr_bot.yml @@ -82,7 +82,7 @@ jobs: # fetch the tags for version number generation fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.12 - name: Install Archery and Crossbow dependencies diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a568f8346e7fc..daadd971f8ac8 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -102,7 +102,7 @@ jobs: key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }} restore-keys: ${{ matrix.cache }}- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -172,7 +172,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@v5.1.1 with: python-version: '3.11' - name: Install Dependencies diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index e8f57db99c28c..0ff7266860f29 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -144,7 +144,7 @@ jobs: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}- ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery @@ -204,7 +204,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index af5382f90834c..1ec071b6bbb5e 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -60,7 +60,7 @@ jobs: repository: ursacomputing/crossbow ref: main - name: Set up Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: cache: 'pip' python-version: 3.12 diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 6a29ec8e72cab..ca2305a7f9357 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -84,7 +84,7 @@ jobs: key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }} restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby- - name: Setup Python - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 with: python-version: 3.8 - name: Setup Archery From 6845bb64e4e4da42b146ca5227db0a1c4ee33f94 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:39:03 +0900 Subject: [PATCH 071/122] MINOR: [CI] Bump actions/setup-dotnet from 4.0.0 to 4.0.1 (#43262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/setup-dotnet](https://github.com/actions/setup-dotnet) from 4.0.0 to 4.0.1.
Release notes

Sourced from actions/setup-dotnet's releases.

v4.0.1

What's Changed

Documentation changes:

Dependency updates:

New Contributors

Full Changelog: https://github.com/actions/setup-dotnet/compare/v4...v4.0.1

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-dotnet&package-manager=github_actions&previous-version=4.0.0&new-version=4.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- .github/workflows/csharp.yml | 6 +++--- .github/workflows/dev.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index c9dd73bbd85f0..6e8548dc960f4 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -49,7 +49,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v4.0.1 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout Arrow @@ -77,7 +77,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v4.0.1 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout Arrow @@ -104,7 +104,7 @@ jobs: dotnet: ['8.0.x'] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v4.0.1 with: dotnet-version: ${{ matrix.dotnet }} - name: Setup Python diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index aeaa723e55509..49568102e11f8 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -113,7 +113,7 @@ jobs: with: ruby-version: ruby - name: Install .NET - uses: actions/setup-dotnet@4d6c8fcf3c8f7a60068d26b594648e99df24cee3 # v4.0.0 + uses: actions/setup-dotnet@6bd8b7f7774af54e05809fcc5431931b3eb1ddee # v4.0.1 with: dotnet-version: '8.0.x' - name: Install Dependencies From 21238a7f5ebb224410d84ecf32dbe3556cab363a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 15 Jul 2024 23:25:47 -0300 Subject: [PATCH 072/122] GH-41481: [CI] Update how extra environment variables are specified for the integration test docker job (#42009) ### Rationale for this change Currently, nanoarrow and Rust are not being included in the integration tests. The command issued by archery includes multiple environment variable definitions and the rightmost ones disable the extra environment variables. https://github.com/apache/arrow/actions/runs/9397807525/job/25881776553#step:9:353 ``` DEBUG:archery:Executing `['docker', 'run', '--rm', '-e', 'ARCHERY_DEFAULT_BRANCH=main', '-e', 'ARCHERY_INTEGRATION_WITH_NANOARROW=1', '-e', 'ARCHERY_INTEGRATION_WITH_RUST=1', '-e', 'ARCHERY_INTEGRATION_WITH_NANOARROW=0', '-e', 'ARCHERY_INTEGRATION_WITH_RUST=0', '-e', 'ARROW_CPP_EXE_PATH=/build/cpp/debug', '-e', 'ARROW_NANOARROW_PATH=/build/nanoarrow', '-e', 'ARROW_RUST_EXE_PATH=/build/rust/debug', '-e', 'CCACHE_COMPILERCHECK=content', '-e', 'CCACHE_COMPRESS=1', '-e', 'CCACHE_COMPRESSLEVEL=6', '-e', 'CCACHE_DIR=/ccache', '-e', 'CCACHE_MAXSIZE=1G', '-e', 'GITHUB_ACTIONS=true', '-v', '/home/runner/work/arrow/arrow:/arrow', '-v', '/home/runner/work/arrow/arrow/.docker/conda-ccache:/ccache', 'apache/arrow-dev:amd64-conda-integration', '/arrow/ci/scripts/integration_arrow_build.sh /arrow /build && /arrow/ci/scripts/integration_arrow.sh /arrow /build']` # ... + /arrow/ci/scripts/rust_build.sh /arrow /build ===================================================================== Not building the Rust implementation. ===================================================================== + /arrow/ci/scripts/nanoarrow_build.sh /arrow /build ===================================================================== Not building nanoarrow ===================================================================== ``` ### What changes are included in this PR? This PR updates how environment variables are specified such that the intended value is passed to the docker build. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41481 Lead-authored-by: Dewey Dunnington Co-authored-by: Dewey Dunnington Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ci/docker/conda-integration.dockerfile | 4 +++- dev/archery/archery/docker/core.py | 25 ++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 30b9cd5199fab..78d2503b23df7 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -44,8 +44,10 @@ RUN mamba install -q -y \ # Install Rust with only the needed components # (rustfmt is needed for tonic-build to compile the protobuf definitions) +# GH-41637: Version pinned at 1.77 because the glibc for conda-cpp is currently too old RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \ - $HOME/.cargo/bin/rustup toolchain install stable && \ + $HOME/.cargo/bin/rustup override set 1.77 && \ + $HOME/.cargo/bin/rustup toolchain install 1.77 && \ $HOME/.cargo/bin/rustup component add rustfmt ENV GOROOT=/opt/go \ diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index cb831060022a4..5be4887ea4f63 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -340,18 +340,9 @@ def run(self, service_name, command=None, *, env=None, volumes=None, service = self.config.get(service_name) args = [] - if user is not None: - args.extend(['-u', user]) - - if env is not None: - for k, v in env.items(): - args.extend(['-e', '{}={}'.format(k, v)]) - if volumes is not None: - for volume in volumes: - args.extend(['--volume', volume]) - - if self.config.using_docker or service['need_gpu'] or resource_limit: + use_docker = self.config.using_docker or service['need_gpu'] or resource_limit + if use_docker: # use gpus, requires docker>=19.03 if service['need_gpu']: args.extend(['--gpus', 'all']) @@ -392,6 +383,18 @@ def run(self, service_name, command=None, *, env=None, volumes=None, args.append(f'--memory={memory}') args.append(f'--memory-swap={memory}') + if user is not None: + args.extend(['-u', user]) + + if env is not None: + for k, v in env.items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + if volumes is not None: + for volume in volumes: + args.extend(['--volume', volume]) + + if use_docker: # get the actual docker image name instead of the compose service # name which we refer as image in general args.append(service['image']) From a43950ace8cd67e856e9aa66e39d19f5b208f0ac Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 04:49:07 -0700 Subject: [PATCH 073/122] MINOR: [C#] Bump xunit.runner.visualstudio from 2.8.1 to 2.8.2 in /csharp (#43260) Bumps xunit.runner.visualstudio from 2.8.1 to 2.8.2. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit.runner.visualstudio&package-manager=nuget&previous-version=2.8.1&new-version=2.8.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index bd97372d1021b..e047bd61a0821 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -9,7 +9,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 5a5a92ccd2c7f..d1974a7838e7a 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 132f17fa212a5..61e6c06f451c5 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index a3290e3be14ee..3dbe34955e6d4 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -18,7 +18,7 @@ - + all runtime; build; native; contentfiles; analyzers From 36fe1daaca6322224ce81dca6a671d434e0106de Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 16 Jul 2024 14:06:47 +0200 Subject: [PATCH 074/122] GH-43254: [C++] Always prefer mimalloc to jemalloc (#40875) ### Rationale for this change As discussed [on the mailing-list](https://lists.apache.org/thread/dts9ggvkthczfpmd25wrz449mxod76o2), this PR switches the default memory pool to mimalloc for all platforms. This should have several desirable effects: * less variability between platforms * mimalloc generally has a nicer, more consistent API and is easier to work with (in particular, jemalloc's configuration scheme is slightly abtruse) * potentially better performance, or at least not significantly worse, than the statu quo ### Are these changes tested? Yes, by existing CI configurations. ### Are there any user-facing changes? Behavior should not change. Performance characteristics of some user workloads might improve or regress, but this is something we cannot predict in advance. * GitHub Issue: #43254 Lead-authored-by: Antoine Pitrou Co-authored-by: Sutou Kouhei Signed-off-by: Antoine Pitrou --- cpp/src/arrow/memory_pool.cc | 18 ++++++++---------- dev/archery/archery/benchmark/runner.py | 2 ++ dev/tasks/linux-packages/github.linux.yml | 2 +- docs/source/cpp/memory.rst | 6 +++--- docs/source/python/memory.rst | 8 ++++---- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2f8ce3a6fa8c7..1e855311a98ed 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -85,19 +85,17 @@ struct SupportedBackend { const std::vector& SupportedBackends() { static std::vector backends = { - // ARROW-12316: Apple => mimalloc first, then jemalloc - // non-Apple => jemalloc first, then mimalloc -#if defined(ARROW_JEMALLOC) && !defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, -#endif + // mimalloc is our preferred allocator for several reasons: + // 1) it has good performance + // 2) it is well-supported on all our main platforms (Linux, macOS, Windows) + // 3) it is easy to configure and has a consistent API. #ifdef ARROW_MIMALLOC - {"mimalloc", MemoryPoolBackend::Mimalloc}, + {"mimalloc", MemoryPoolBackend::Mimalloc}, #endif -#if defined(ARROW_JEMALLOC) && defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, +#ifdef ARROW_JEMALLOC + {"jemalloc", MemoryPoolBackend::Jemalloc}, #endif - {"system", MemoryPoolBackend::System} - }; + {"system", MemoryPoolBackend::System}}; return backends; } diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index a91989fb95257..9ebb9226e3743 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -123,6 +123,8 @@ def default_configuration(**kwargs): with_csv=True, with_dataset=True, with_json=True, + with_jemalloc=True, + with_mimalloc=True, with_parquet=True, with_python=False, with_brotli=True, diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml index 9e24835b8b627..891682c4358d8 100644 --- a/dev/tasks/linux-packages/github.linux.yml +++ b/dev/tasks/linux-packages/github.linux.yml @@ -64,7 +64,7 @@ jobs: run: | set -e pushd arrow/dev/tasks/linux-packages - rake version:update + rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)" rake docker:pull || : rake --trace {{ task_namespace }}:build BUILD_DIR=build popd diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst index 33907b5580f61..032b7d1ac90f1 100644 --- a/docs/source/cpp/memory.rst +++ b/docs/source/cpp/memory.rst @@ -139,9 +139,9 @@ Default Memory Pool The default memory pool depends on how Arrow C++ was compiled: -- if enabled at compile time, a `jemalloc `_ heap; -- otherwise, if enabled at compile time, a - `mimalloc `_ heap; +- if enabled at compile time, a `mimalloc `_ + heap; +- otherwise, if enabled at compile time, a `jemalloc `_ heap; - otherwise, the C library ``malloc`` heap. Overriding the Default Memory Pool diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 7b49d48ab20fa..029d30cc1b693 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -110,12 +110,12 @@ the buffer is garbage-collected, all of the memory is freed: pa.total_allocated_bytes() Besides the default built-in memory pool, there may be additional memory pools -to choose (such as `mimalloc `_) -from depending on how Arrow was built. One can get the backend -name for a memory pool:: +to choose from (such as `jemalloc `_) +depending on how Arrow was built. One can get the backend name for a memory +pool:: >>> pa.default_memory_pool().backend_name - 'jemalloc' + 'mimalloc' .. seealso:: :ref:`API documentation for memory pools `. From 12f68fca055c6301947fa29c72cda13a9360e054 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Tue, 16 Jul 2024 20:37:05 +0800 Subject: [PATCH 075/122] GH-43209: [C++] Add lint for DCHECK in public headers (#43248) ### Rationale for this change I raised my question in #43209 about which I have always been curious. The top answer makes a good sense and after some searching in the code base, I found more evidence like: https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/public_api_test.cc#L67-L71 So I'm making the following changes to `DCHECK` macro family. ### What changes are included in this PR? 1. Add lint rule for `DCHECK` usage in public headers; 2. Cleanup exisiting `DCHECK` usages in public (probably non-public, but at least not named with `internal`) headers; 3. Add `ifdef` protection for `DCHECK` definition like we did for `ASSIGN_OR_RAISE` (https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/result_internal.h#L20-L22) and `RETURN_NOT_OK` (https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/status.h#L80-L82), to not mess up user code that directly includes `arrow/util/logging.h`. 4. Add comments as guideline. ### Are these changes tested? No test needed. ### Are there any user-facing changes? Probably not? * GitHub Issue: #43209 Authored-by: Ruoxi Sun Signed-off-by: Antoine Pitrou --- cpp/build-support/lint_cpp_cli.py | 11 +++++------ cpp/src/arrow/acero/asof_join_node.cc | 2 +- cpp/src/arrow/acero/sorted_merge_node.cc | 2 +- ..._table.h => unmaterialized_table_internal.h} | 0 cpp/src/arrow/acero/util.h | 2 +- ...ream_utils.h => bit_stream_utils_internal.h} | 0 cpp/src/arrow/util/bit_util_test.cc | 2 +- cpp/src/arrow/util/logging.h | 17 +++++++++++++++++ .../{rle_encoding.h => rle_encoding_internal.h} | 2 +- cpp/src/arrow/util/rle_encoding_test.cc | 4 ++-- cpp/src/arrow/util/tdigest.h | 2 +- cpp/src/arrow/util/vector.h | 8 ++++---- cpp/src/gandiva/dex_visitor.h | 2 +- cpp/src/gandiva/engine.h | 2 +- cpp/src/gandiva/eval_batch.h | 12 ++++++------ cpp/src/gandiva/llvm_types.h | 2 +- cpp/src/gandiva/local_bitmaps_holder.h | 2 +- cpp/src/gandiva/selection_vector_impl.h | 2 +- cpp/src/parquet/bloom_filter.h | 4 ++-- cpp/src/parquet/column_reader.cc | 4 ++-- cpp/src/parquet/column_writer.cc | 4 ++-- cpp/src/parquet/encoding.cc | 4 ++-- cpp/src/parquet/level_conversion_inc.h | 2 +- 23 files changed, 54 insertions(+), 38 deletions(-) rename cpp/src/arrow/acero/{unmaterialized_table.h => unmaterialized_table_internal.h} (100%) rename cpp/src/arrow/util/{bit_stream_utils.h => bit_stream_utils_internal.h} (100%) rename cpp/src/arrow/util/{rle_encoding.h => rle_encoding_internal.h} (99%) diff --git a/cpp/build-support/lint_cpp_cli.py b/cpp/build-support/lint_cpp_cli.py index a0eb8f0efe6d5..47abd53fe925d 100755 --- a/cpp/build-support/lint_cpp_cli.py +++ b/cpp/build-support/lint_cpp_cli.py @@ -31,6 +31,7 @@ _NULLPTR_REGEX = re.compile(r'.*\bnullptr\b.*') _RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*') _ASSIGN_OR_RAISE_REGEX = re.compile(r'.*\sASSIGN_OR_RAISE.*') +_DCHECK_REGEX = re.compile(r'.*\sDCHECK.*') def _paths(paths): @@ -54,14 +55,12 @@ def lint_file(path): (lambda x: re.match(_RETURN_NOT_OK_REGEX, x), 'Use ARROW_RETURN_NOT_OK in header files', _paths('''\ arrow/status.h - test - arrow/util/hash.h arrow/python/util''')), (lambda x: re.match(_ASSIGN_OR_RAISE_REGEX, x), - 'Use ARROW_ASSIGN_OR_RAISE in header files', _paths('''\ - arrow/result_internal.h - test - ''')) + 'Use ARROW_ASSIGN_OR_RAISE in header files', []), + (lambda x: re.match(_DCHECK_REGEX, x), + 'Use ARROW_DCHECK in header files', _paths('''\ + arrow/util/logging.h''')) ] diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 848cbdf7506ad..2248362241cd7 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -32,7 +32,7 @@ #include "arrow/acero/exec_plan.h" #include "arrow/acero/options.h" -#include "arrow/acero/unmaterialized_table.h" +#include "arrow/acero/unmaterialized_table_internal.h" #ifndef NDEBUG #include "arrow/acero/options_internal.h" #endif diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc index a71ac79efcc46..2845383cee982 100644 --- a/cpp/src/arrow/acero/sorted_merge_node.cc +++ b/cpp/src/arrow/acero/sorted_merge_node.cc @@ -28,7 +28,7 @@ #include "arrow/acero/options.h" #include "arrow/acero/query_context.h" #include "arrow/acero/time_series_util.h" -#include "arrow/acero/unmaterialized_table.h" +#include "arrow/acero/unmaterialized_table_internal.h" #include "arrow/acero/util.h" #include "arrow/array/builder_base.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/acero/unmaterialized_table.h b/cpp/src/arrow/acero/unmaterialized_table_internal.h similarity index 100% rename from cpp/src/arrow/acero/unmaterialized_table.h rename to cpp/src/arrow/acero/unmaterialized_table_internal.h diff --git a/cpp/src/arrow/acero/util.h b/cpp/src/arrow/acero/util.h index 0eb9f4c87e180..ee46e8527422a 100644 --- a/cpp/src/arrow/acero/util.h +++ b/cpp/src/arrow/acero/util.h @@ -65,7 +65,7 @@ class ARROW_ACERO_EXPORT AtomicCounter { // return true if the counter is complete bool Increment() { - DCHECK_NE(count_.load(), total_.load()); + ARROW_DCHECK_NE(count_.load(), total_.load()); int count = count_.fetch_add(1) + 1; if (count != total_.load()) return false; return DoneOnce(); diff --git a/cpp/src/arrow/util/bit_stream_utils.h b/cpp/src/arrow/util/bit_stream_utils_internal.h similarity index 100% rename from cpp/src/arrow/util/bit_stream_utils.h rename to cpp/src/arrow/util/bit_stream_utils_internal.h diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index e026dfec24065..c7674af57f167 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -43,7 +43,7 @@ #include "arrow/testing/util.h" #include "arrow/type_fwd.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bitmap.h" #include "arrow/util/bitmap_generate.h" #include "arrow/util/bitmap_ops.h" diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 2a2175ec0fc72..be73c020c07f8 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -138,14 +138,31 @@ enum class ArrowLogLevel : int { #endif // NDEBUG +// These are internal-use macros and should not be used in public headers. +#ifndef DCHECK #define DCHECK ARROW_DCHECK +#endif +#ifndef DCHECK_OK #define DCHECK_OK ARROW_DCHECK_OK +#endif +#ifndef DCHECK_EQ #define DCHECK_EQ ARROW_DCHECK_EQ +#endif +#ifndef DCHECK_NE #define DCHECK_NE ARROW_DCHECK_NE +#endif +#ifndef DCHECK_LE #define DCHECK_LE ARROW_DCHECK_LE +#endif +#ifndef DCHECK_LT #define DCHECK_LT ARROW_DCHECK_LT +#endif +#ifndef DCHECK_GE #define DCHECK_GE ARROW_DCHECK_GE +#endif +#ifndef DCHECK_GT #define DCHECK_GT ARROW_DCHECK_GT +#endif // This code is adapted from // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h. diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding_internal.h similarity index 99% rename from cpp/src/arrow/util/rle_encoding.h rename to cpp/src/arrow/util/rle_encoding_internal.h index e0f5690062a04..4575320659706 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -27,7 +27,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 26984e5f7735d..0cc0a276a25f4 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -28,10 +28,10 @@ #include "arrow/buffer.h" #include "arrow/testing/random.h" #include "arrow/type.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/io_util.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" namespace arrow { namespace util { diff --git a/cpp/src/arrow/util/tdigest.h b/cpp/src/arrow/util/tdigest.h index 308df468840eb..ea033ed696d1b 100644 --- a/cpp/src/arrow/util/tdigest.h +++ b/cpp/src/arrow/util/tdigest.h @@ -56,7 +56,7 @@ class ARROW_EXPORT TDigest { // this function is intensively called and performance critical // call it only if you are sure no NAN exists in input data void Add(double value) { - DCHECK(!std::isnan(value)) << "cannot add NAN"; + ARROW_DCHECK(!std::isnan(value)) << "cannot add NAN"; if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) { MergeInput(); } diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h index 74b6a2403a2bb..e77d713a44d01 100644 --- a/cpp/src/arrow/util/vector.h +++ b/cpp/src/arrow/util/vector.h @@ -31,8 +31,8 @@ namespace internal { template std::vector DeleteVectorElement(const std::vector& values, size_t index) { - DCHECK(!values.empty()); - DCHECK_LT(index, values.size()); + ARROW_DCHECK(!values.empty()); + ARROW_DCHECK_LT(index, values.size()); std::vector out; out.reserve(values.size() - 1); for (size_t i = 0; i < index; ++i) { @@ -47,7 +47,7 @@ std::vector DeleteVectorElement(const std::vector& values, size_t index) { template std::vector AddVectorElement(const std::vector& values, size_t index, T new_element) { - DCHECK_LE(index, values.size()); + ARROW_DCHECK_LE(index, values.size()); std::vector out; out.reserve(values.size() + 1); for (size_t i = 0; i < index; ++i) { @@ -63,7 +63,7 @@ std::vector AddVectorElement(const std::vector& values, size_t index, template std::vector ReplaceVectorElement(const std::vector& values, size_t index, T new_element) { - DCHECK_LE(index, values.size()); + ARROW_DCHECK_LE(index, values.size()); std::vector out; out.reserve(values.size()); for (size_t i = 0; i < index; ++i) { diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h index 5d160bb22ca68..4115df7ffb22b 100644 --- a/cpp/src/gandiva/dex_visitor.h +++ b/cpp/src/gandiva/dex_visitor.h @@ -70,7 +70,7 @@ class GANDIVA_EXPORT DexVisitor { /// Default implementation with only DCHECK(). #define VISIT_DCHECK(DEX_CLASS) \ - void Visit(const DEX_CLASS& dex) override { DCHECK(0); } + void Visit(const DEX_CLASS& dex) override { ARROW_DCHECK(0); } class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor { VISIT_DCHECK(VectorReadValidityDex) diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 565c3f142502d..3a69500e38bcf 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -67,7 +67,7 @@ class GANDIVA_EXPORT Engine { /// Add the function to the list of IR functions that need to be compiled. /// Compiling only the functions that are used by the module saves time. void AddFunctionToCompile(const std::string& fname) { - DCHECK(!module_finalized_); + ARROW_DCHECK(!module_finalized_); functions_to_compile_.push_back(fname); } diff --git a/cpp/src/gandiva/eval_batch.h b/cpp/src/gandiva/eval_batch.h index 9644010b721af..feb4cdc975005 100644 --- a/cpp/src/gandiva/eval_batch.h +++ b/cpp/src/gandiva/eval_batch.h @@ -53,22 +53,22 @@ class EvalBatch { int GetNumBuffers() const { return num_buffers_; } const uint8_t* GetBuffer(int idx) const { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffers_array_.get())[idx]; } uint8_t* GetBuffer(int idx) { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffers_array_.get())[idx]; } int64_t GetBufferOffset(int idx) const { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); return (buffer_offsets_array_.get())[idx]; } void SetBuffer(int idx, uint8_t* buffer, int64_t offset) { - DCHECK(idx <= num_buffers_); + ARROW_DCHECK(idx <= num_buffers_); (buffers_array_.get())[idx] = buffer; (buffer_offsets_array_.get())[idx] = offset; } @@ -80,11 +80,11 @@ class EvalBatch { } const uint8_t* GetLocalBitMap(int idx) const { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_holder_->GetLocalBitMap(idx); } uint8_t* GetLocalBitMap(int idx) { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_holder_->GetLocalBitMap(idx); } diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index d6f0952713efc..7768a7f7e4bde 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -97,7 +97,7 @@ class GANDIVA_EXPORT LLVMTypes { } else if (type->isFloatingPointTy()) { return llvm::ConstantFP::get(type, 0); } else { - DCHECK(type->isPointerTy()); + ARROW_DCHECK(type->isPointerTy()); return llvm::ConstantPointerNull::getNullValue(type); } } diff --git a/cpp/src/gandiva/local_bitmaps_holder.h b/cpp/src/gandiva/local_bitmaps_holder.h index a172fb973c4a5..dc24a32e7cad0 100644 --- a/cpp/src/gandiva/local_bitmaps_holder.h +++ b/cpp/src/gandiva/local_bitmaps_holder.h @@ -40,7 +40,7 @@ class LocalBitMapsHolder { uint8_t** GetLocalBitMapArray() const { return local_bitmaps_array_.get(); } uint8_t* GetLocalBitMap(int idx) const { - DCHECK(idx <= GetNumLocalBitMaps()); + ARROW_DCHECK(idx <= GetNumLocalBitMaps()); return local_bitmaps_array_.get()[idx]; } diff --git a/cpp/src/gandiva/selection_vector_impl.h b/cpp/src/gandiva/selection_vector_impl.h index dc9724ca86fe2..234298daf5748 100644 --- a/cpp/src/gandiva/selection_vector_impl.h +++ b/cpp/src/gandiva/selection_vector_impl.h @@ -60,7 +60,7 @@ class SelectionVectorImpl : public SelectionVector { int64_t GetNumSlots() const override { return num_slots_; } void SetNumSlots(int64_t num_slots) override { - DCHECK_LE(num_slots, max_slots_); + ARROW_DCHECK_LE(num_slots, max_slots_); num_slots_ = num_slots; } diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index 909563d013fed..82172f363ba7e 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -221,7 +221,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// kMaximumBloomFilterBytes, and the return value is always a power of 2 static uint32_t OptimalNumOfBytes(uint32_t ndv, double fpp) { uint32_t optimal_num_of_bits = OptimalNumOfBits(ndv, fpp); - DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits)); + ARROW_DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits)); return optimal_num_of_bits >> 3; } @@ -233,7 +233,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { /// @return it always return a value between kMinimumBloomFilterBytes * 8 and /// kMaximumBloomFilterBytes * 8, and the return value is always a power of 16 static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) { - DCHECK(fpp > 0.0 && fpp < 1.0); + ARROW_DCHECK(fpp > 0.0 && fpp < 1.0); const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8)); uint32_t num_bits; diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index ebf9515f27607..05ee6a16c5448 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -36,14 +36,14 @@ #include "arrow/array/builder_primitive.h" #include "arrow/chunked_array.h" #include "arrow/type.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/compression.h" #include "arrow/util/crc32.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/unreachable.h" #include "parquet/column_page.h" #include "parquet/encoding.h" diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index c9f6e482981c0..90e0102b422bb 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -32,7 +32,7 @@ #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/checked_cast.h" @@ -41,7 +41,7 @@ #include "arrow/util/endian.h" #include "arrow/util/float16.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/type_traits.h" #include "arrow/visit_array_inline.h" #include "parquet/column_page.h" diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 54e1e000040a1..c3f2b79629d9b 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -33,7 +33,7 @@ #include "arrow/type_traits.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" -#include "arrow/util/bit_stream_utils.h" +#include "arrow/util/bit_stream_utils_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_writer.h" @@ -42,7 +42,7 @@ #include "arrow/util/hashing.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/rle_encoding.h" +#include "arrow/util/rle_encoding_internal.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/exception.h" diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h index d1ccedabfde50..3accb154e6f5a 100644 --- a/cpp/src/parquet/level_conversion_inc.h +++ b/cpp/src/parquet/level_conversion_inc.h @@ -296,7 +296,7 @@ template int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size, int64_t upper_bound_remaining, LevelInfo level_info, ::arrow::internal::FirstTimeBitmapWriter* writer) { - DCHECK_LE(batch_size, kExtractBitsSize); + ARROW_DCHECK_LE(batch_size, kExtractBitsSize); // Greater than level_info.def_level - 1 implies >= the def_level auto defined_bitmap = static_cast( From 18319d30e558af72f72ea58c7d7996e781777643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 16 Jul 2024 16:08:28 +0200 Subject: [PATCH 076/122] MINOR: [Release] Update versions for 18.0.0-SNAPSHOT --- c_glib/meson.build | 2 +- c_glib/tool/generate-version-header.py | 1 + c_glib/vcpkg.json | 2 +- ci/scripts/PKGBUILD | 2 +- cpp/CMakeLists.txt | 2 +- cpp/vcpkg.json | 2 +- csharp/Directory.Build.props | 2 +- .../homebrew-formulae/apache-arrow-glib.rb | 2 +- dev/tasks/homebrew-formulae/apache-arrow.rb | 2 +- docs/source/_static/versions.json | 9 ++++-- docs/source/index.rst | 2 +- go/README.md | 2 +- go/arrow/_examples/helloworld/main.go | 8 ++--- go/arrow/_tools/tmpl/main.go | 2 +- go/arrow/array.go | 4 +-- go/arrow/array/array.go | 6 ++-- go/arrow/array/array_test.go | 10 +++--- go/arrow/array/binary.go | 6 ++-- go/arrow/array/binary_test.go | 6 ++-- go/arrow/array/binarybuilder.go | 8 ++--- go/arrow/array/binarybuilder_test.go | 6 ++-- go/arrow/array/boolean.go | 8 ++--- go/arrow/array/boolean_test.go | 4 +-- go/arrow/array/booleanbuilder.go | 10 +++--- go/arrow/array/booleanbuilder_test.go | 6 ++-- go/arrow/array/bufferbuilder.go | 8 ++--- go/arrow/array/bufferbuilder_byte.go | 2 +- go/arrow/array/bufferbuilder_numeric.gen.go | 6 ++-- .../array/bufferbuilder_numeric.gen.go.tmpl | 6 ++-- go/arrow/array/bufferbuilder_numeric_test.go | 4 +-- go/arrow/array/builder.go | 8 ++--- go/arrow/array/builder_test.go | 4 +-- go/arrow/array/compare.go | 6 ++-- go/arrow/array/compare_test.go | 10 +++--- go/arrow/array/concat.go | 14 ++++---- go/arrow/array/concat_test.go | 10 +++--- go/arrow/array/data.go | 6 ++-- go/arrow/array/data_test.go | 4 +-- go/arrow/array/decimal128.go | 12 +++---- go/arrow/array/decimal128_test.go | 8 ++--- go/arrow/array/decimal256.go | 12 +++---- go/arrow/array/decimal256_test.go | 8 ++--- go/arrow/array/decimal_test.go | 12 +++---- go/arrow/array/dictionary.go | 20 ++++++------ go/arrow/array/dictionary_test.go | 14 ++++---- go/arrow/array/diff.go | 2 +- go/arrow/array/diff_test.go | 10 +++--- go/arrow/array/encoded.go | 12 +++---- go/arrow/array/encoded_test.go | 8 ++--- go/arrow/array/extension.go | 6 ++-- go/arrow/array/extension_test.go | 8 ++--- go/arrow/array/fixed_size_list.go | 10 +++--- go/arrow/array/fixed_size_list_test.go | 6 ++-- go/arrow/array/fixedsize_binary.go | 4 +-- go/arrow/array/fixedsize_binary_test.go | 6 ++-- go/arrow/array/fixedsize_binarybuilder.go | 8 ++--- .../array/fixedsize_binarybuilder_test.go | 4 +-- go/arrow/array/float16.go | 6 ++-- go/arrow/array/float16_builder.go | 12 +++---- go/arrow/array/float16_builder_test.go | 6 ++-- go/arrow/array/interval.go | 10 +++--- go/arrow/array/interval_test.go | 6 ++-- go/arrow/array/json_reader.go | 8 ++--- go/arrow/array/json_reader_test.go | 6 ++-- go/arrow/array/list.go | 10 +++--- go/arrow/array/list_test.go | 6 ++-- go/arrow/array/map.go | 6 ++-- go/arrow/array/map_test.go | 6 ++-- go/arrow/array/null.go | 8 ++--- go/arrow/array/null_test.go | 6 ++-- go/arrow/array/numeric.gen.go | 4 +-- go/arrow/array/numeric.gen.go.tmpl | 4 +-- go/arrow/array/numeric_test.go | 10 +++--- go/arrow/array/numericbuilder.gen.go | 10 +++--- go/arrow/array/numericbuilder.gen.go.tmpl | 10 +++--- go/arrow/array/numericbuilder.gen_test.go | 6 ++-- .../array/numericbuilder.gen_test.go.tmpl | 6 ++-- go/arrow/array/record.go | 8 ++--- go/arrow/array/record_test.go | 6 ++-- go/arrow/array/string.go | 6 ++-- go/arrow/array/string_test.go | 8 ++--- go/arrow/array/struct.go | 10 +++--- go/arrow/array/struct_test.go | 6 ++-- go/arrow/array/table.go | 4 +-- go/arrow/array/table_test.go | 6 ++-- go/arrow/array/timestamp.go | 10 +++--- go/arrow/array/timestamp_test.go | 6 ++-- go/arrow/array/union.go | 12 +++---- go/arrow/array/union_test.go | 6 ++-- go/arrow/array/util.go | 10 +++--- go/arrow/array/util_test.go | 14 ++++---- go/arrow/arrio/arrio.go | 2 +- go/arrow/arrio/arrio_test.go | 10 +++--- go/arrow/avro/avro2parquet/main.go | 8 ++--- go/arrow/avro/reader.go | 8 ++--- go/arrow/avro/reader_test.go | 2 +- go/arrow/avro/reader_types.go | 12 +++---- go/arrow/avro/schema.go | 8 ++--- go/arrow/avro/schema_test.go | 2 +- go/arrow/bitutil/bitmaps.go | 6 ++-- go/arrow/bitutil/bitmaps_test.go | 4 +-- go/arrow/bitutil/bitutil.go | 2 +- go/arrow/bitutil/bitutil_test.go | 4 +-- go/arrow/cdata/cdata.go | 8 ++--- go/arrow/cdata/cdata_exports.go | 10 +++--- go/arrow/cdata/cdata_test.go | 12 +++---- go/arrow/cdata/cdata_test_framework.go | 8 ++--- go/arrow/cdata/exports.go | 4 +-- go/arrow/cdata/import_allocator.go | 2 +- go/arrow/cdata/interface.go | 8 ++--- go/arrow/cdata/test/test_cimport.go | 8 ++--- go/arrow/compute/arithmetic.go | 12 +++---- go/arrow/compute/arithmetic_test.go | 20 ++++++------ go/arrow/compute/cast.go | 10 +++--- go/arrow/compute/cast_test.go | 20 ++++++------ go/arrow/compute/datum.go | 6 ++-- go/arrow/compute/example_test.go | 10 +++--- go/arrow/compute/exec.go | 6 ++-- go/arrow/compute/exec/kernel.go | 8 ++--- go/arrow/compute/exec/kernel_test.go | 12 +++---- go/arrow/compute/exec/span.go | 10 +++--- go/arrow/compute/exec/span_test.go | 16 +++++----- go/arrow/compute/exec/utils.go | 8 ++--- go/arrow/compute/exec/utils_test.go | 8 ++--- go/arrow/compute/exec_internals_test.go | 14 ++++---- go/arrow/compute/exec_test.go | 12 +++---- go/arrow/compute/executor.go | 16 +++++----- go/arrow/compute/expression.go | 16 +++++----- go/arrow/compute/expression_test.go | 10 +++--- go/arrow/compute/exprs/builders.go | 4 +-- go/arrow/compute/exprs/builders_test.go | 4 +-- go/arrow/compute/exprs/exec.go | 18 +++++------ go/arrow/compute/exprs/exec_internal_test.go | 8 ++--- go/arrow/compute/exprs/exec_test.go | 12 +++---- go/arrow/compute/exprs/extension_types.go | 4 +-- go/arrow/compute/exprs/field_refs.go | 10 +++--- go/arrow/compute/exprs/types.go | 4 +-- go/arrow/compute/fieldref.go | 4 +-- go/arrow/compute/fieldref_hash.go | 2 +- go/arrow/compute/fieldref_test.go | 8 ++--- go/arrow/compute/functions.go | 4 +-- go/arrow/compute/functions_test.go | 4 +-- .../internal/kernels/base_arithmetic.go | 10 +++--- .../internal/kernels/base_arithmetic_amd64.go | 6 ++-- .../kernels/base_arithmetic_avx2_amd64.go | 2 +- .../kernels/base_arithmetic_sse4_amd64.go | 2 +- .../kernels/basic_arithmetic_noasm.go | 4 +-- .../compute/internal/kernels/boolean_cast.go | 6 ++-- go/arrow/compute/internal/kernels/cast.go | 6 ++-- .../compute/internal/kernels/cast_numeric.go | 2 +- .../kernels/cast_numeric_avx2_amd64.go | 2 +- .../kernels/cast_numeric_neon_arm64.go | 2 +- .../kernels/cast_numeric_sse4_amd64.go | 2 +- .../compute/internal/kernels/cast_temporal.go | 8 ++--- go/arrow/compute/internal/kernels/helpers.go | 14 ++++---- .../compute/internal/kernels/numeric_cast.go | 14 ++++---- go/arrow/compute/internal/kernels/rounding.go | 10 +++--- .../internal/kernels/scalar_arithmetic.go | 14 ++++---- .../internal/kernels/scalar_boolean.go | 6 ++-- .../kernels/scalar_comparison_amd64.go | 2 +- .../kernels/scalar_comparison_avx2_amd64.go | 2 +- .../kernels/scalar_comparison_noasm.go | 2 +- .../kernels/scalar_comparison_sse4_amd64.go | 2 +- .../internal/kernels/scalar_comparisons.go | 16 +++++----- .../compute/internal/kernels/string_casts.go | 12 +++---- go/arrow/compute/internal/kernels/types.go | 8 ++--- .../compute/internal/kernels/vector_hash.go | 14 ++++---- .../internal/kernels/vector_run_end_encode.go | 16 +++++----- .../internal/kernels/vector_selection.go | 14 ++++---- go/arrow/compute/registry.go | 2 +- go/arrow/compute/registry_test.go | 6 ++-- go/arrow/compute/scalar_bool.go | 6 ++-- go/arrow/compute/scalar_bool_test.go | 10 +++--- go/arrow/compute/scalar_compare.go | 6 ++-- go/arrow/compute/scalar_compare_test.go | 18 +++++------ go/arrow/compute/selection.go | 8 ++--- go/arrow/compute/utils.go | 14 ++++---- go/arrow/compute/vector_hash.go | 4 +-- go/arrow/compute/vector_hash_test.go | 12 +++---- go/arrow/compute/vector_run_end_test.go | 14 ++++---- go/arrow/compute/vector_run_ends.go | 4 +-- go/arrow/compute/vector_selection_test.go | 18 +++++------ go/arrow/csv/common.go | 4 +-- go/arrow/csv/reader.go | 14 ++++---- go/arrow/csv/reader_test.go | 14 ++++---- go/arrow/csv/transformer.go | 4 +-- go/arrow/csv/writer.go | 2 +- go/arrow/csv/writer_test.go | 16 +++++----- go/arrow/datatype.go | 2 +- go/arrow/datatype_binary_test.go | 2 +- go/arrow/datatype_extension_test.go | 4 +-- go/arrow/datatype_fixedwidth.go | 2 +- go/arrow/datatype_fixedwidth_test.go | 2 +- go/arrow/datatype_nested.go | 2 +- go/arrow/datatype_null_test.go | 2 +- go/arrow/datatype_viewheader.go | 6 ++-- go/arrow/datatype_viewheader_inline.go | 2 +- go/arrow/datatype_viewheader_inline_go1.19.go | 2 +- go/arrow/datatype_viewheader_inline_tinygo.go | 2 +- go/arrow/decimal128/decimal128.go | 2 +- go/arrow/decimal128/decimal128_test.go | 2 +- go/arrow/decimal256/decimal256.go | 4 +-- go/arrow/decimal256/decimal256_test.go | 2 +- go/arrow/doc.go | 2 +- go/arrow/encoded/ree_utils.go | 2 +- go/arrow/encoded/ree_utils_test.go | 8 ++--- go/arrow/endian/endian.go | 4 +-- go/arrow/example_test.go | 8 ++--- go/arrow/flight/basic_auth_flight_test.go | 2 +- go/arrow/flight/client.go | 2 +- go/arrow/flight/cookie_middleware_test.go | 2 +- go/arrow/flight/example_flight_server_test.go | 2 +- go/arrow/flight/flight_middleware_test.go | 4 +-- go/arrow/flight/flight_test.go | 10 +++--- go/arrow/flight/flightsql/client.go | 12 +++---- go/arrow/flight/flightsql/client_test.go | 12 +++---- go/arrow/flight/flightsql/column_metadata.go | 2 +- go/arrow/flight/flightsql/driver/README.md | 6 ++-- .../flight/flightsql/driver/config_test.go | 2 +- go/arrow/flight/flightsql/driver/driver.go | 10 +++--- .../flight/flightsql/driver/driver_test.go | 14 ++++---- go/arrow/flight/flightsql/driver/utils.go | 4 +-- .../flight/flightsql/driver/utils_test.go | 12 +++---- .../cmd/sqlite_flightsql_server/main.go | 6 ++-- .../flightsql/example/sql_batch_reader.go | 10 +++--- .../flight/flightsql/example/sqlite_info.go | 4 +-- .../flight/flightsql/example/sqlite_server.go | 14 ++++---- .../sqlite_tables_schema_batch_reader.go | 12 +++---- .../flight/flightsql/example/type_info.go | 8 ++--- .../flightsql/schema_ref/reference_schemas.go | 2 +- go/arrow/flight/flightsql/server.go | 16 +++++----- go/arrow/flight/flightsql/server_test.go | 14 ++++---- go/arrow/flight/flightsql/sql_info.go | 4 +-- .../flight/flightsql/sqlite_server_test.go | 16 +++++----- go/arrow/flight/flightsql/types.go | 2 +- go/arrow/flight/record_batch_reader.go | 14 ++++---- go/arrow/flight/record_batch_writer.go | 6 ++-- go/arrow/flight/server.go | 2 +- go/arrow/flight/server_example_test.go | 2 +- .../flight/session/example_session_test.go | 6 ++-- go/arrow/flight/session/session.go | 2 +- go/arrow/flight/session/stateful_session.go | 2 +- go/arrow/flight/session/stateless_session.go | 2 +- go/arrow/internal/arrdata/arrdata.go | 16 +++++----- go/arrow/internal/arrdata/ioutil.go | 10 +++--- go/arrow/internal/arrjson/arrjson.go | 20 ++++++------ go/arrow/internal/arrjson/arrjson_test.go | 6 ++-- go/arrow/internal/arrjson/option.go | 4 +-- go/arrow/internal/arrjson/reader.go | 10 +++--- go/arrow/internal/arrjson/writer.go | 10 +++--- .../internal/cdata_integration/entrypoints.go | 10 +++--- go/arrow/internal/dictutils/dict.go | 6 ++-- go/arrow/internal/dictutils/dict_test.go | 8 ++--- .../arrow-flight-integration-client/main.go | 2 +- .../arrow-flight-integration-server/main.go | 2 +- .../internal/flight_integration/scenario.go | 20 ++++++------ .../internal/testing/gen/random_array_gen.go | 10 +++--- go/arrow/internal/testing/tools/bits_test.go | 2 +- go/arrow/internal/testing/tools/data_types.go | 4 +-- go/arrow/internal/utils.go | 4 +-- go/arrow/ipc/cmd/arrow-cat/main.go | 4 +-- go/arrow/ipc/cmd/arrow-cat/main_test.go | 8 ++--- go/arrow/ipc/cmd/arrow-file-to-stream/main.go | 6 ++-- .../ipc/cmd/arrow-file-to-stream/main_test.go | 4 +-- .../cmd/arrow-json-integration-test/main.go | 12 +++---- .../arrow-json-integration-test/main_test.go | 4 +-- go/arrow/ipc/cmd/arrow-ls/main.go | 4 +-- go/arrow/ipc/cmd/arrow-ls/main_test.go | 8 ++--- go/arrow/ipc/cmd/arrow-stream-to-file/main.go | 6 ++-- .../ipc/cmd/arrow-stream-to-file/main_test.go | 4 +-- go/arrow/ipc/compression.go | 6 ++-- go/arrow/ipc/endian_swap.go | 6 ++-- go/arrow/ipc/endian_swap_test.go | 10 +++--- go/arrow/ipc/file_reader.go | 16 +++++----- go/arrow/ipc/file_test.go | 6 ++-- go/arrow/ipc/file_writer.go | 10 +++--- go/arrow/ipc/ipc.go | 8 ++--- go/arrow/ipc/ipc_test.go | 10 +++--- go/arrow/ipc/message.go | 6 ++-- go/arrow/ipc/message_test.go | 6 ++-- go/arrow/ipc/metadata.go | 10 +++--- go/arrow/ipc/metadata_test.go | 12 +++---- go/arrow/ipc/reader.go | 16 +++++----- go/arrow/ipc/reader_test.go | 6 ++-- go/arrow/ipc/stream_test.go | 6 ++-- go/arrow/ipc/writer.go | 18 +++++------ go/arrow/ipc/writer_test.go | 10 +++--- go/arrow/math/float64.go | 2 +- go/arrow/math/float64_avx2_amd64.go | 2 +- go/arrow/math/float64_neon_arm64.go | 2 +- go/arrow/math/float64_sse4_amd64.go | 2 +- go/arrow/math/float64_test.go | 6 ++-- go/arrow/math/int64.go | 2 +- go/arrow/math/int64_avx2_amd64.go | 2 +- go/arrow/math/int64_neon_arm64.go | 2 +- go/arrow/math/int64_sse4_amd64.go | 2 +- go/arrow/math/int64_test.go | 6 ++-- go/arrow/math/type.go.tmpl | 2 +- go/arrow/math/type_simd_amd64.go.tmpl | 2 +- go/arrow/math/type_simd_arm64.go.tmpl | 2 +- go/arrow/math/type_test.go.tmpl | 6 ++-- go/arrow/math/uint64.go | 2 +- go/arrow/math/uint64_avx2_amd64.go | 2 +- go/arrow/math/uint64_neon_arm64.go | 2 +- go/arrow/math/uint64_sse4_amd64.go | 2 +- go/arrow/math/uint64_test.go | 6 ++-- go/arrow/memory/buffer.go | 2 +- go/arrow/memory/buffer_test.go | 2 +- go/arrow/memory/cgo_allocator.go | 2 +- go/arrow/memory/default_mallocator.go | 2 +- go/arrow/memory/default_mallocator_test.go | 4 +-- go/arrow/memory/mallocator/mallocator_test.go | 2 +- go/arrow/memory/memory_test.go | 2 +- go/arrow/record.go | 2 +- go/arrow/scalar/append.go | 10 +++--- go/arrow/scalar/append_test.go | 10 +++--- go/arrow/scalar/binary.go | 4 +-- go/arrow/scalar/compare.go | 2 +- go/arrow/scalar/nested.go | 8 ++--- go/arrow/scalar/numeric.gen.go | 6 ++-- go/arrow/scalar/numeric.gen_test.go | 4 +-- go/arrow/scalar/numeric.gen_test.go.tmpl | 4 +-- go/arrow/scalar/parse.go | 12 +++---- go/arrow/scalar/scalar.go | 20 ++++++------ go/arrow/scalar/scalar_test.go | 12 +++---- go/arrow/scalar/temporal.go | 2 +- go/arrow/schema.go | 2 +- go/arrow/schema_test.go | 2 +- go/arrow/table.go | 2 +- go/arrow/tensor/numeric.gen.go | 2 +- go/arrow/tensor/numeric.gen.go.tmpl | 4 +-- go/arrow/tensor/numeric.gen_test.go | 8 ++--- go/arrow/tensor/numeric.gen_test.go.tmpl | 8 ++--- go/arrow/tensor/tensor.go | 4 +-- go/arrow/tensor/tensor_test.go | 8 ++--- go/arrow/type_traits.go | 6 ++-- go/arrow/type_traits_boolean.go | 2 +- go/arrow/type_traits_decimal128.go | 4 +-- go/arrow/type_traits_decimal256.go | 4 +-- go/arrow/type_traits_float16.go | 4 +-- go/arrow/type_traits_interval.go | 4 +-- go/arrow/type_traits_numeric.gen.go | 2 +- go/arrow/type_traits_numeric.gen.go.tmpl | 2 +- go/arrow/type_traits_numeric.gen_test.go | 2 +- go/arrow/type_traits_numeric.gen_test.go.tmpl | 2 +- go/arrow/type_traits_test.go | 8 ++--- go/arrow/type_traits_timestamp.go | 2 +- go/arrow/type_traits_view.go | 2 +- go/arrow/util/byte_size.go | 6 ++-- go/arrow/util/byte_size_test.go | 8 ++--- go/arrow/util/protobuf_reflect.go | 6 ++-- go/arrow/util/protobuf_reflect_test.go | 8 ++--- go/go.mod | 2 +- go/internal/bitutils/bit_block_counter.go | 4 +-- .../bitutils/bit_block_counter_test.go | 6 ++-- go/internal/bitutils/bit_run_reader.go | 6 ++-- go/internal/bitutils/bit_run_reader_test.go | 6 ++-- go/internal/bitutils/bit_set_run_reader.go | 4 +-- .../bitutils/bit_set_run_reader_test.go | 6 ++-- go/internal/bitutils/bitmap_generate.go | 2 +- go/internal/bitutils/bitmap_generate_test.go | 2 +- go/internal/hashing/xxh3_memo_table.gen.go | 6 ++-- .../hashing/xxh3_memo_table.gen.go.tmpl | 4 +-- go/internal/types/extension_types.go | 6 ++-- go/internal/types/extension_types_test.go | 10 +++--- go/internal/utils/transpose_ints_def.go | 2 +- go/internal/utils/transpose_ints_test.go | 2 +- go/parquet/cmd/parquet_reader/dumper.go | 6 ++-- go/parquet/cmd/parquet_reader/main.go | 10 +++--- go/parquet/cmd/parquet_schema/main.go | 4 +-- go/parquet/compress/brotli.go | 2 +- go/parquet/compress/compress.go | 2 +- go/parquet/compress/compress_test.go | 2 +- go/parquet/compress/zstd.go | 2 +- go/parquet/doc.go | 6 ++-- go/parquet/encryption_properties.go | 2 +- go/parquet/encryption_properties_test.go | 4 +-- go/parquet/encryption_read_config_test.go | 8 ++--- go/parquet/encryption_write_config_test.go | 8 ++--- go/parquet/file/column_reader.go | 14 ++++---- go/parquet/file/column_reader_test.go | 12 +++---- go/parquet/file/column_reader_types.gen.go | 6 ++-- .../file/column_reader_types.gen.go.tmpl | 4 +-- go/parquet/file/column_writer.go | 16 +++++----- go/parquet/file/column_writer_test.go | 32 +++++++++---------- go/parquet/file/column_writer_types.gen.go | 16 +++++----- .../file/column_writer_types.gen.go.tmpl | 10 +++--- go/parquet/file/file_reader.go | 8 ++--- go/parquet/file/file_reader_mmap.go | 2 +- go/parquet/file/file_reader_mmap_windows.go | 2 +- go/parquet/file/file_reader_test.go | 20 ++++++------ go/parquet/file/file_writer.go | 10 +++--- go/parquet/file/file_writer_test.go | 14 ++++---- go/parquet/file/level_conversion.go | 10 +++--- go/parquet/file/level_conversion_test.go | 6 ++-- go/parquet/file/page_reader.go | 14 ++++---- go/parquet/file/page_writer.go | 18 +++++------ go/parquet/file/record_reader.go | 16 +++++----- go/parquet/file/row_group_reader.go | 8 ++--- go/parquet/file/row_group_writer.go | 8 ++--- go/parquet/file/row_group_writer_test.go | 8 ++--- go/parquet/internal/bmi/bmi_test.go | 2 +- .../internal/encoding/boolean_decoder.go | 8 ++--- .../internal/encoding/boolean_encoder.go | 8 ++--- .../internal/encoding/byte_array_decoder.go | 12 +++---- .../internal/encoding/byte_array_encoder.go | 10 +++--- .../internal/encoding/byte_stream_split.go | 8 ++--- go/parquet/internal/encoding/decoder.go | 20 ++++++------ .../internal/encoding/delta_bit_packing.go | 10 +++--- .../internal/encoding/delta_byte_array.go | 6 ++-- .../encoding/delta_byte_array_test.go | 4 +-- .../encoding/delta_length_byte_array.go | 6 ++-- go/parquet/internal/encoding/encoder.go | 16 +++++----- .../encoding/encoding_benchmarks_test.go | 16 +++++----- go/parquet/internal/encoding/encoding_test.go | 14 ++++---- .../encoding/fixed_len_byte_array_decoder.go | 4 +-- .../encoding/fixed_len_byte_array_encoder.go | 6 ++-- go/parquet/internal/encoding/levels.go | 10 +++--- go/parquet/internal/encoding/levels_test.go | 10 +++--- go/parquet/internal/encoding/memo_table.go | 10 +++--- .../internal/encoding/memo_table_test.go | 10 +++--- .../internal/encoding/memo_table_types.gen.go | 4 +-- .../encoding/memo_table_types.gen.go.tmpl | 2 +- .../encoding/plain_encoder_types.gen.go | 10 +++--- .../encoding/plain_encoder_types.gen.go.tmpl | 8 ++--- .../internal/encoding/typed_encoder.gen.go | 18 +++++------ .../encoding/typed_encoder.gen.go.tmpl | 14 ++++---- go/parquet/internal/encoding/types.go | 10 +++--- go/parquet/internal/encryption/aes.go | 2 +- go/parquet/internal/encryption/decryptor.go | 4 +-- go/parquet/internal/encryption/encryptor.go | 4 +-- go/parquet/internal/testutils/pagebuilder.go | 14 ++++---- .../internal/testutils/primitive_typed.go | 10 +++--- go/parquet/internal/testutils/random.go | 16 +++++----- go/parquet/internal/testutils/random_arrow.go | 8 ++--- go/parquet/internal/testutils/utils.go | 2 +- go/parquet/internal/thrift/helpers.go | 2 +- .../internal/utils/bit_benchmark_test.go | 6 ++-- .../internal/utils/bit_packing_arm64.go | 2 +- go/parquet/internal/utils/bit_reader.go | 8 ++--- go/parquet/internal/utils/bit_reader_test.go | 10 +++--- go/parquet/internal/utils/bit_writer.go | 2 +- go/parquet/internal/utils/bitmap_writer.go | 2 +- .../internal/utils/bitmap_writer_test.go | 4 +-- go/parquet/internal/utils/rle.go | 8 ++--- .../internal/utils/typed_rle_dict.gen.go | 6 ++-- .../internal/utils/typed_rle_dict.gen.go.tmpl | 6 ++-- go/parquet/metadata/app_version.go | 4 +-- go/parquet/metadata/column_chunk.go | 14 ++++---- go/parquet/metadata/file.go | 12 +++---- go/parquet/metadata/metadata_test.go | 6 ++-- go/parquet/metadata/row_group.go | 8 ++--- go/parquet/metadata/stat_compare_test.go | 4 +-- go/parquet/metadata/statistics.go | 18 +++++------ go/parquet/metadata/statistics_test.go | 12 +++---- go/parquet/metadata/statistics_types.gen.go | 18 +++++------ .../metadata/statistics_types.gen.go.tmpl | 14 ++++---- go/parquet/pqarrow/column_readers.go | 20 ++++++------ go/parquet/pqarrow/encode_arrow.go | 20 ++++++------ go/parquet/pqarrow/encode_arrow_test.go | 32 +++++++++---------- go/parquet/pqarrow/encode_dict_compute.go | 16 +++++----- go/parquet/pqarrow/encode_dict_nocompute.go | 4 +-- go/parquet/pqarrow/encode_dictionary_test.go | 16 +++++----- go/parquet/pqarrow/file_reader.go | 14 ++++---- go/parquet/pqarrow/file_reader_test.go | 16 +++++----- go/parquet/pqarrow/file_writer.go | 12 +++---- go/parquet/pqarrow/file_writer_test.go | 10 +++--- go/parquet/pqarrow/helpers.go | 2 +- go/parquet/pqarrow/path_builder.go | 12 +++---- go/parquet/pqarrow/path_builder_test.go | 8 ++--- go/parquet/pqarrow/properties.go | 6 ++-- go/parquet/pqarrow/reader_writer_test.go | 12 +++---- go/parquet/pqarrow/schema.go | 18 +++++------ go/parquet/pqarrow/schema_test.go | 18 +++++------ go/parquet/reader_properties.go | 4 +-- go/parquet/reader_writer_properties_test.go | 6 ++-- go/parquet/schema/column.go | 4 +-- go/parquet/schema/converted_types.go | 2 +- go/parquet/schema/converted_types_test.go | 2 +- go/parquet/schema/helpers.go | 2 +- go/parquet/schema/helpers_test.go | 4 +-- go/parquet/schema/logical_types.go | 8 ++--- go/parquet/schema/logical_types_test.go | 6 ++-- go/parquet/schema/node.go | 4 +-- go/parquet/schema/reflection.go | 8 ++--- go/parquet/schema/reflection_test.go | 6 ++-- go/parquet/schema/schema.go | 4 +-- go/parquet/schema/schema_element_test.go | 4 +-- go/parquet/schema/schema_flatten_test.go | 4 +-- go/parquet/schema/schema_test.go | 6 ++-- go/parquet/types.go | 4 +-- go/parquet/writer_properties.go | 6 ++-- java/adapter/avro/pom.xml | 2 +- java/adapter/jdbc/pom.xml | 2 +- java/adapter/orc/pom.xml | 2 +- java/algorithm/pom.xml | 2 +- java/bom/pom.xml | 2 +- java/c/pom.xml | 2 +- java/compression/pom.xml | 2 +- java/dataset/pom.xml | 2 +- java/flight/flight-core/pom.xml | 2 +- java/flight/flight-integration-tests/pom.xml | 2 +- java/flight/flight-sql-jdbc-core/pom.xml | 2 +- java/flight/flight-sql-jdbc-driver/pom.xml | 2 +- java/flight/flight-sql/pom.xml | 2 +- java/flight/pom.xml | 2 +- java/format/pom.xml | 2 +- java/gandiva/pom.xml | 2 +- .../module-info-compiler-maven-plugin/pom.xml | 2 +- java/maven/pom.xml | 2 +- java/memory/memory-core/pom.xml | 2 +- java/memory/memory-netty-buffer-patch/pom.xml | 2 +- java/memory/memory-netty/pom.xml | 2 +- java/memory/memory-unsafe/pom.xml | 2 +- java/memory/pom.xml | 2 +- java/performance/pom.xml | 2 +- java/pom.xml | 2 +- java/tools/pom.xml | 2 +- java/vector/pom.xml | 2 +- js/package.json | 2 +- matlab/CMakeLists.txt | 2 +- python/CMakeLists.txt | 2 +- python/pyproject.toml | 2 +- r/DESCRIPTION | 2 +- r/NEWS.md | 4 ++- r/_pkgdown.yml | 2 +- r/pkgdown/assets/versions.json | 8 +++-- ruby/red-arrow-cuda/lib/arrow-cuda/version.rb | 2 +- .../lib/arrow-dataset/version.rb | 2 +- .../lib/arrow-flight-sql/version.rb | 2 +- .../lib/arrow-flight/version.rb | 2 +- ruby/red-arrow/lib/arrow/version.rb | 2 +- ruby/red-gandiva/lib/gandiva/version.rb | 2 +- ruby/red-parquet/lib/parquet/version.rb | 2 +- 534 files changed, 1831 insertions(+), 1819 deletions(-) diff --git a/c_glib/meson.build b/c_glib/meson.build index 06aa5b941e77c..214c57747033e 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -35,7 +35,7 @@ project('arrow-glib', 'c', 'cpp', # * 22.04: 0.61.2 meson_version: '>=0.53.2') -version = '17.0.0-SNAPSHOT' +version = '18.0.0-SNAPSHOT' if version.endswith('-SNAPSHOT') version_numbers = version.split('-')[0].split('.') version_tag = version.split('-')[1] diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py index 7422432251ff1..ba8cb03d15a3e 100755 --- a/c_glib/tool/generate-version-header.py +++ b/c_glib/tool/generate-version-header.py @@ -140,6 +140,7 @@ def generate_availability_macros(library: str) -> str: ALL_VERSIONS = [ + (18, 0), (17, 0), (16, 0), (15, 0), diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json index e88d2b8fe30d5..3941edbfec527 100644 --- a/c_glib/vcpkg.json +++ b/c_glib/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow-glib", - "version-string": "17.0.0-SNAPSHOT", + "version-string": "18.0.0-SNAPSHOT", "dependencies": [ "glib", "gobject-introspection", diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index e12099f2b405d..ed68faae950b1 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=16.1.0.9000 +pkgver=17.0.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8247043b8bf84..a1e3138da9e0b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -79,7 +79,7 @@ if(POLICY CMP0170) cmake_policy(SET CMP0170 NEW) endif() -set(ARROW_VERSION "17.0.0-SNAPSHOT") +set(ARROW_VERSION "18.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index f087a6d24c8f9..6f825b55cfd94 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "17.0.0-SNAPSHOT", + "version-string": "18.0.0-SNAPSHOT", "dependencies": [ "abseil", { diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index 3c06d3cd31d90..43c93238d6a7b 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -29,7 +29,7 @@ Apache Arrow library Copyright 2016-2024 The Apache Software Foundation The Apache Software Foundation - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index d8f5ca872dbec..6bcae64adb92f 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-17.0.0-SNAPSHOT/apache-arrow-17.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index e17d524bd9d71..955dfa0ea9fa4 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-17.0.0-SNAPSHOT/apache-arrow-17.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index e879fc69138d0..6a684b56d57b5 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,15 +1,20 @@ [ { - "name": "17.0 (dev)", + "name": "18.0 (dev)", "version": "dev/", "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "16.1 (stable)", + "name": "17.0 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "16.1", + "version": "16.1/", + "url": "https://arrow.apache.org/docs/16.1/" + }, { "name": "16.0", "version": "16.0/", diff --git a/docs/source/index.rst b/docs/source/index.rst index 0afe52758af25..6f38ab668d883 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -104,7 +104,7 @@ Implementations C/GLib C++ C# - Go + Go Java JavaScript Julia diff --git a/go/README.md b/go/README.md index 220b0a230a615..51ac06c87f171 100644 --- a/go/README.md +++ b/go/README.md @@ -20,7 +20,7 @@ Apache Arrow for Go =================== -[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v17.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v17) +[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v18.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v18) [Apache Arrow][arrow] is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go index f4348d6e66771..7f932801917a4 100644 --- a/go/arrow/_examples/helloworld/main.go +++ b/go/arrow/_examples/helloworld/main.go @@ -19,10 +19,10 @@ package main import ( "os" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go index 1f83a1b905ae9..33cb1686981f4 100644 --- a/go/arrow/_tools/tmpl/main.go +++ b/go/arrow/_tools/tmpl/main.go @@ -28,7 +28,7 @@ import ( "strings" "text/template" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/internal/json" ) const Ext = ".tmpl" diff --git a/go/arrow/array.go b/go/arrow/array.go index 7f04eab71e5f4..768b30f8e0690 100644 --- a/go/arrow/array.go +++ b/go/arrow/array.go @@ -19,8 +19,8 @@ package arrow import ( "fmt" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // ArrayData is the underlying memory and metadata of an Arrow array, corresponding diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index be6ba864d1aa7..ae33ca5417db0 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -19,9 +19,9 @@ package array import ( "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const ( diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go index 9beadc3cdc654..4d83766b4fa3e 100644 --- a/go/arrow/array/array_test.go +++ b/go/arrow/array/array_test.go @@ -19,11 +19,11 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index d36c0aac7d3f2..99764270bf39d 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -23,9 +23,9 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type BinaryLike interface { diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go index c8d793ef0670e..919fff7b5e5e8 100644 --- a/go/arrow/array/binary_test.go +++ b/go/arrow/array/binary_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go index e5b4dd63cf9a5..6fcc4eaf46479 100644 --- a/go/arrow/array/binarybuilder.go +++ b/go/arrow/array/binarybuilder.go @@ -25,10 +25,10 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A BinaryBuilder is used to build a Binary array using the Append methods. diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go index e37c5f624f9cc..65d5c7385df4c 100644 --- a/go/arrow/array/binarybuilder_test.go +++ b/go/arrow/array/binarybuilder_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go index 50a4101db18b3..eab26d273dd96 100644 --- a/go/arrow/array/boolean.go +++ b/go/arrow/array/boolean.go @@ -21,10 +21,10 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of boolean values. diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go index c3bc235de3f21..f980497d54521 100644 --- a/go/arrow/array/boolean_test.go +++ b/go/arrow/array/boolean_test.go @@ -22,8 +22,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go index d58ff80151c06..44d33018f94ea 100644 --- a/go/arrow/array/booleanbuilder.go +++ b/go/arrow/array/booleanbuilder.go @@ -23,11 +23,11 @@ import ( "strconv" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type BooleanBuilder struct { diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go index e01f6660c4c10..42e49f95a2f3e 100644 --- a/go/arrow/array/booleanbuilder_test.go +++ b/go/arrow/array/booleanbuilder_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go index b9638b311584d..037d220f0b141 100644 --- a/go/arrow/array/bufferbuilder.go +++ b/go/arrow/array/bufferbuilder.go @@ -20,10 +20,10 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) type bufBuilder interface { diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go index dd39d6b835cc3..2ac7ec703b579 100644 --- a/go/arrow/array/bufferbuilder_byte.go +++ b/go/arrow/array/bufferbuilder_byte.go @@ -16,7 +16,7 @@ package array -import "github.com/apache/arrow/go/v17/arrow/memory" +import "github.com/apache/arrow/go/v18/arrow/memory" type byteBufferBuilder struct { bufferBuilder diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go index 19388e27b0ee3..5215ecf65a312 100644 --- a/go/arrow/array/bufferbuilder_numeric.gen.go +++ b/go/arrow/array/bufferbuilder_numeric.gen.go @@ -19,9 +19,9 @@ package array import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" ) type int64BufferBuilder struct { diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl index 40c96b5987cf3..2b7fcaefcdeb2 100644 --- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl +++ b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl @@ -17,9 +17,9 @@ package array import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" ) {{range .In}} diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go index 372ba6976269d..3c947c87eeaac 100644 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ b/go/arrow/array/bufferbuilder_numeric_test.go @@ -20,8 +20,8 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index 88c0ac479868b..6c8ea877a2fb0 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -20,10 +20,10 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) const ( diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go index d508d4626d4e9..7eb2b3f7cf9e3 100644 --- a/go/arrow/array/builder_test.go +++ b/go/arrow/array/builder_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go index 7c6f3cab7a7a7..a54c1e23c1e1c 100644 --- a/go/arrow/array/compare.go +++ b/go/arrow/array/compare.go @@ -20,9 +20,9 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/bitutils" ) // RecordEqual reports whether the two provided records are equal. diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go index 223535bf64000..f757ab9f25f07 100644 --- a/go/arrow/array/compare_test.go +++ b/go/arrow/array/compare_test.go @@ -22,11 +22,11 @@ import ( "sort" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go index 184f7143ee5df..3d2b4b4b83167 100644 --- a/go/arrow/array/concat.go +++ b/go/arrow/array/concat.go @@ -23,13 +23,13 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" ) // Concatenate creates a new arrow.Array which is the concatenation of the diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go index bfde89ec6600d..7e6a3c08efd5c 100644 --- a/go/arrow/array/concat_test.go +++ b/go/arrow/array/concat_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go index 40fad0dfd0033..19513ebaacf50 100644 --- a/go/arrow/array/data.go +++ b/go/arrow/array/data.go @@ -22,9 +22,9 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) // Data represents the memory and metadata of an Arrow array. diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go index 68f2ada97b037..2cfc64fbe2d7e 100644 --- a/go/arrow/array/data_test.go +++ b/go/arrow/array/data_test.go @@ -20,8 +20,8 @@ import ( "slices" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go index 4578fd4061dc4..fd9e53f7f4c06 100644 --- a/go/arrow/array/decimal128.go +++ b/go/arrow/array/decimal128.go @@ -24,12 +24,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of 128-bit decimal values. diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go index 3e2d021924723..707a4f1a6c8d5 100644 --- a/go/arrow/array/decimal128_test.go +++ b/go/arrow/array/decimal128_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index bc082638b7bc4..6431306f969c3 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -24,12 +24,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go index dd671b53bf101..8adb810165430 100644 --- a/go/arrow/array/decimal256_test.go +++ b/go/arrow/array/decimal256_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go index 414bb3426ca34..b321bd7fbbe7b 100644 --- a/go/arrow/array/decimal_test.go +++ b/go/arrow/array/decimal_test.go @@ -21,12 +21,12 @@ import ( "math/big" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index 6e202deeb37cd..ca7fed5257085 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -25,16 +25,16 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/utils" ) // Dictionary represents the type for dictionary-encoded data with a data diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go index 128182d005eba..ea9587d8dcdf9 100644 --- a/go/arrow/array/dictionary_test.go +++ b/go/arrow/array/dictionary_test.go @@ -24,13 +24,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/array/diff.go b/go/arrow/array/diff.go index 7110804d1b2cb..e5c1ce1521d95 100644 --- a/go/arrow/array/diff.go +++ b/go/arrow/array/diff.go @@ -20,7 +20,7 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Edit represents one entry in the edit script to compare two arrays. diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go index 67fa1d04ab506..65d212be11838 100644 --- a/go/arrow/array/diff_test.go +++ b/go/arrow/array/diff_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/types" ) type diffTestCase struct { diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index 7df66061b5eb7..748c4c1fec641 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -23,12 +23,12 @@ import ( "reflect" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/utils" ) // RunEndEncoded represents an array containing two children: diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go index 1c54e56aaea6f..03352ec44177c 100644 --- a/go/arrow/array/encoded_test.go +++ b/go/arrow/array/encoded_test.go @@ -20,10 +20,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go index 5df86c8964a7b..8c4ef840cb72c 100644 --- a/go/arrow/array/extension.go +++ b/go/arrow/array/extension.go @@ -20,9 +20,9 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // ExtensionArray is the interface that needs to be implemented to handle diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go index 4520117f857f8..71ea9f105af7c 100644 --- a/go/arrow/array/extension_test.go +++ b/go/arrow/array/extension_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go index 830361b518acd..a0eefd460c2bf 100644 --- a/go/arrow/array/fixed_size_list.go +++ b/go/arrow/array/fixed_size_list.go @@ -22,11 +22,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // FixedSizeList represents an immutable sequence of N array values. diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go index ff8c5fe552e4c..e0edb9868cffd 100644 --- a/go/arrow/array/fixed_size_list_test.go +++ b/go/arrow/array/fixed_size_list_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go index 535b2d51003a7..f4d16c6386d60 100644 --- a/go/arrow/array/fixedsize_binary.go +++ b/go/arrow/array/fixedsize_binary.go @@ -22,8 +22,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of fixed-length binary strings. diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go index 927818a640a3a..4a32cb9692a06 100644 --- a/go/arrow/array/fixedsize_binary_test.go +++ b/go/arrow/array/fixedsize_binary_test.go @@ -21,9 +21,9 @@ import ( "github.com/stretchr/testify/assert" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestFixedSizeBinary(t *testing.T) { diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go index 08e8ae841b9e4..96d58632ab8c8 100644 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ b/go/arrow/array/fixedsize_binarybuilder.go @@ -23,10 +23,10 @@ import ( "reflect" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go index 3bd8611f37230..0c58c65ecb02e 100644 --- a/go/arrow/array/fixedsize_binarybuilder_test.go +++ b/go/arrow/array/fixedsize_binarybuilder_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go index b71f23b22c5e6..757b658a9150d 100644 --- a/go/arrow/array/float16.go +++ b/go/arrow/array/float16.go @@ -20,9 +20,9 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of Float16 values. diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go index 9eeb22feca43e..7543f2b6f96dd 100644 --- a/go/arrow/array/float16_builder.go +++ b/go/arrow/array/float16_builder.go @@ -23,12 +23,12 @@ import ( "strconv" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Float16Builder struct { diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go index 47d84302a9008..ab25e544ed833 100644 --- a/go/arrow/array/float16_builder_test.go +++ b/go/arrow/array/float16_builder_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go index 2cabfad18a84a..66c6eca21bca5 100644 --- a/go/arrow/array/interval.go +++ b/go/arrow/array/interval.go @@ -23,11 +23,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) func NewIntervalData(data arrow.ArrayData) arrow.Array { diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go index c10112caaa5f1..6d36885a627d9 100644 --- a/go/arrow/array/interval_test.go +++ b/go/arrow/array/interval_test.go @@ -20,9 +20,9 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go index 4f5ebea6b3a25..2944151a5f63c 100644 --- a/go/arrow/array/json_reader.go +++ b/go/arrow/array/json_reader.go @@ -22,10 +22,10 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Option func(config) diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go index 30a6d5833ee69..5e258dfdc07b1 100644 --- a/go/arrow/array/json_reader_test.go +++ b/go/arrow/array/json_reader_test.go @@ -20,9 +20,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 16b7cf1bc05a9..1e2191f2cfc3a 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -22,11 +22,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type ListLike interface { diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go index 5b624dccfc91a..f6f42a31299e4 100644 --- a/go/arrow/array/list_test.go +++ b/go/arrow/array/list_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index eb11f963c5837..a692c2cd6d71a 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -20,9 +20,9 @@ import ( "bytes" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Map represents an immutable sequence of Key/Value structs. It is a diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go index cbea072e09045..e73508e6afe11 100644 --- a/go/arrow/array/map_test.go +++ b/go/arrow/array/map_test.go @@ -20,9 +20,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go index 365964ebb0a2f..6dccd3af59f2a 100644 --- a/go/arrow/array/null.go +++ b/go/arrow/array/null.go @@ -23,10 +23,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Null represents an immutable, degenerate array with no physical storage. diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go index ae645f9564220..61ccb472b1f7b 100644 --- a/go/arrow/array/null_test.go +++ b/go/arrow/array/null_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index b962cda40b8b3..413a356c2a8ab 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -24,8 +24,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/json" ) // A type which represents an immutable sequence of int64 values. diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl index d9daa55849619..1f4b56609f464 100644 --- a/go/arrow/array/numeric.gen.go.tmpl +++ b/go/arrow/array/numeric.gen.go.tmpl @@ -21,8 +21,8 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/json" ) {{range .In}} diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go index 3013d45acbb2b..bb8acc3f41519 100644 --- a/go/arrow/array/numeric_test.go +++ b/go/arrow/array/numeric_test.go @@ -21,11 +21,11 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go index 227fcb2f68e83..c80f0c7c9578e 100644 --- a/go/arrow/array/numericbuilder.gen.go +++ b/go/arrow/array/numericbuilder.gen.go @@ -27,11 +27,11 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Int64Builder struct { diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl index c701c49c4210d..d8b92cf60cc39 100644 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ b/go/arrow/array/numericbuilder.gen.go.tmpl @@ -17,11 +17,11 @@ package array import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) {{range .In}} diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index b43aa7f807090..8adf86853b7c7 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -22,9 +22,9 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl index 9f381aa055fac..f3cd08a63745d 100644 --- a/go/arrow/array/numericbuilder.gen_test.go.tmpl +++ b/go/arrow/array/numericbuilder.gen_test.go.tmpl @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go index 93543f929ed7f..2735f1baa9a30 100644 --- a/go/arrow/array/record.go +++ b/go/arrow/array/record.go @@ -22,10 +22,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // RecordReader reads a stream of records. diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go index 6712a1c9085be..8e6dc3b06d25e 100644 --- a/go/arrow/array/record_test.go +++ b/go/arrow/array/record_test.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go index f4735f2cb5e0e..88b4568ad5e84 100644 --- a/go/arrow/array/string.go +++ b/go/arrow/array/string.go @@ -23,9 +23,9 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type StringLike interface { diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go index e9d9e061b4c52..efbe51edd1a03 100644 --- a/go/arrow/array/string_test.go +++ b/go/arrow/array/string_test.go @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go index 1921ee86dd894..279ac1d87b25b 100644 --- a/go/arrow/array/struct.go +++ b/go/arrow/array/struct.go @@ -23,11 +23,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Struct represents an ordered sequence of relative types. diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go index 03190585fadf0..4338bbd0b136e 100644 --- a/go/arrow/array/struct_test.go +++ b/go/arrow/array/struct_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go index 54299ba6b1922..3b742ae78803d 100644 --- a/go/arrow/array/table.go +++ b/go/arrow/array/table.go @@ -23,8 +23,8 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // NewColumnSlice returns a new zero-copy slice of the column with the indicated diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go index 2b9aaa7f352bf..e8357ac3dfb69 100644 --- a/go/arrow/array/table_test.go +++ b/go/arrow/array/table_test.go @@ -22,9 +22,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestChunked(t *testing.T) { diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go index dfd62ad68e568..679d9a5a8a4cc 100644 --- a/go/arrow/array/timestamp.go +++ b/go/arrow/array/timestamp.go @@ -24,11 +24,11 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) // Timestamp represents an immutable sequence of arrow.Timestamp values. diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go index 38ab9279f6785..cb9f957d3f255 100644 --- a/go/arrow/array/timestamp_test.go +++ b/go/arrow/array/timestamp_test.go @@ -20,9 +20,9 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go index 10dc560348dae..5d2a8b8ecb2f0 100644 --- a/go/arrow/array/union.go +++ b/go/arrow/array/union.go @@ -25,12 +25,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/json" ) // Union is a convenience interface to encompass both Sparse and Dense diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go index d848340a96070..43e7afd693b6c 100644 --- a/go/arrow/array/union_test.go +++ b/go/arrow/array/union_test.go @@ -21,9 +21,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go index e82eb24679bc4..2b41dadaf4bfc 100644 --- a/go/arrow/array/util.go +++ b/go/arrow/array/util.go @@ -22,11 +22,11 @@ import ( "io" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/internal/json" ) func min(a, b int) int { diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go index 2f395b03ffbbb..114ea6e546649 100644 --- a/go/arrow/array/util_test.go +++ b/go/arrow/array/util_test.go @@ -25,13 +25,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go index 74bebd7ebe691..53215c81f75eb 100644 --- a/go/arrow/arrio/arrio.go +++ b/go/arrow/arrio/arrio.go @@ -22,7 +22,7 @@ import ( "errors" "io" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Reader is the interface that wraps the Read method. diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go index 09d47c50c8e23..26863ec252bf7 100644 --- a/go/arrow/arrio/arrio_test.go +++ b/go/arrow/arrio/arrio_test.go @@ -22,11 +22,11 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) type copyKind int diff --git a/go/arrow/avro/avro2parquet/main.go b/go/arrow/avro/avro2parquet/main.go index 6a74940ffe2c1..ae514c5ed1fda 100644 --- a/go/arrow/avro/avro2parquet/main.go +++ b/go/arrow/avro/avro2parquet/main.go @@ -26,10 +26,10 @@ import ( "runtime/pprof" "time" - "github.com/apache/arrow/go/v17/arrow/avro" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - pq "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow/avro" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + pq "github.com/apache/arrow/go/v18/parquet/pqarrow" ) var ( diff --git a/go/arrow/avro/reader.go b/go/arrow/avro/reader.go index e4a3aefd96da1..1463041499de2 100644 --- a/go/arrow/avro/reader.go +++ b/go/arrow/avro/reader.go @@ -23,10 +23,10 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/hamba/avro/v2/ocf" "github.com/tidwall/sjson" diff --git a/go/arrow/avro/reader_test.go b/go/arrow/avro/reader_test.go index 3a02c995970a2..2cb1a7caa801c 100644 --- a/go/arrow/avro/reader_test.go +++ b/go/arrow/avro/reader_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" hamba "github.com/hamba/avro/v2" ) diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go index 3c3ebd3147053..e07cd380d511f 100644 --- a/go/arrow/avro/reader_types.go +++ b/go/arrow/avro/reader_types.go @@ -23,12 +23,12 @@ import ( "fmt" "math/big" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" ) type dataLoader struct { diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go index 6e2ac00124df9..007dad06c19cd 100644 --- a/go/arrow/avro/schema.go +++ b/go/arrow/avro/schema.go @@ -22,10 +22,10 @@ import ( "math" "strconv" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/internal/types" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/internal/types" + "github.com/apache/arrow/go/v18/internal/utils" avro "github.com/hamba/avro/v2" ) diff --git a/go/arrow/avro/schema_test.go b/go/arrow/avro/schema_test.go index ee4525707aadf..395abcb694d84 100644 --- a/go/arrow/avro/schema_test.go +++ b/go/arrow/avro/schema_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" hamba "github.com/hamba/avro/v2" ) diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go index eb3210043537b..fb4fcd597b804 100644 --- a/go/arrow/bitutil/bitmaps.go +++ b/go/arrow/bitutil/bitmaps.go @@ -22,9 +22,9 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) // BitmapReader is a simple bitmap reader for a byte slice. diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go index 8d6bfdd1dde28..726bfa050cc4b 100644 --- a/go/arrow/bitutil/bitmaps_test.go +++ b/go/arrow/bitutil/bitmaps_test.go @@ -22,8 +22,8 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go index a6497196fe15a..c4b633c73aa40 100644 --- a/go/arrow/bitutil/bitutil.go +++ b/go/arrow/bitutil/bitutil.go @@ -21,7 +21,7 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory" ) var ( diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go index fc5be55113b99..c03bf5268a5ff 100644 --- a/go/arrow/bitutil/bitutil_test.go +++ b/go/arrow/bitutil/bitutil_test.go @@ -21,8 +21,8 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 157df47adedc8..0562eaed0fb7a 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -50,10 +50,10 @@ import ( "syscall" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index b971cb21de1b9..59775926d7ef8 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -44,11 +44,11 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/ipc" ) func encodeCMetadata(keys, values []string) []byte { diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index a200e2db45145..3563aeb5f0f1e 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -35,12 +35,12 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index caa1208a20ae5..968b28b4e4afb 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -69,10 +69,10 @@ import ( "runtime/cgo" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" ) const ( diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go index 40f5fb2023558..6dbcde831d889 100644 --- a/go/arrow/cdata/exports.go +++ b/go/arrow/cdata/exports.go @@ -20,8 +20,8 @@ import ( "runtime/cgo" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) // #include diff --git a/go/arrow/cdata/import_allocator.go b/go/arrow/cdata/import_allocator.go index ad107902eb8ba..4e5c2a7b38c72 100644 --- a/go/arrow/cdata/import_allocator.go +++ b/go/arrow/cdata/import_allocator.go @@ -20,7 +20,7 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // #include "arrow/c/helpers.h" diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index ba821896168e2..005dda73ff0ec 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -22,10 +22,10 @@ package cdata import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go index 83ce27ece0d9f..5315853fc59ca 100644 --- a/go/arrow/cdata/test/test_cimport.go +++ b/go/arrow/cdata/test/test_cimport.go @@ -23,10 +23,10 @@ import ( "fmt" "runtime" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/cdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/cdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) // #include diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go index e152413cd3eed..51ca027d53375 100644 --- a/go/arrow/compute/arithmetic.go +++ b/go/arrow/compute/arithmetic.go @@ -22,12 +22,12 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/scalar" ) type ( diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go index 307fa4389544b..6e693481a322c 100644 --- a/go/arrow/compute/arithmetic_test.go +++ b/go/arrow/compute/arithmetic_test.go @@ -26,16 +26,16 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/klauspost/cpuid/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/arrow/compute/cast.go b/go/arrow/compute/cast.go index dc2a6d022b2a1..6ef6fdddd16ff 100644 --- a/go/arrow/compute/cast.go +++ b/go/arrow/compute/cast.go @@ -23,11 +23,11 @@ import ( "fmt" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go index 11b9587e9bdca..2e748a2fee9c2 100644 --- a/go/arrow/compute/cast_test.go +++ b/go/arrow/compute/cast_test.go @@ -26,16 +26,16 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go index 40d9aa5db8601..9619fe09610de 100644 --- a/go/arrow/compute/datum.go +++ b/go/arrow/compute/datum.go @@ -21,9 +21,9 @@ package compute import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/scalar" ) //go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment diff --git a/go/arrow/compute/example_test.go b/go/arrow/compute/example_test.go index 8d157aa40ac25..d427fb622d24a 100644 --- a/go/arrow/compute/example_test.go +++ b/go/arrow/compute/example_test.go @@ -23,11 +23,11 @@ import ( "fmt" "log" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/memory" ) // This example demonstrates how to register a custom scalar function. diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go index e3e3fc0e5b887..1142297c1c396 100644 --- a/go/arrow/compute/exec.go +++ b/go/arrow/compute/exec.go @@ -22,9 +22,9 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func haveChunkedArray(values []Datum) bool { diff --git a/go/arrow/compute/exec/kernel.go b/go/arrow/compute/exec/kernel.go index 657f18cb87373..600e52c681686 100644 --- a/go/arrow/compute/exec/kernel.go +++ b/go/arrow/compute/exec/kernel.go @@ -24,10 +24,10 @@ import ( "hash/maphash" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/exec/kernel_test.go b/go/arrow/compute/exec/kernel_test.go index 13beb62d3d704..248bad323a307 100644 --- a/go/arrow/compute/exec/kernel_test.go +++ b/go/arrow/compute/exec/kernel_test.go @@ -22,12 +22,12 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exec/span.go b/go/arrow/compute/exec/span.go index d62306e663882..6156acfd008aa 100644 --- a/go/arrow/compute/exec/span.go +++ b/go/arrow/compute/exec/span.go @@ -22,11 +22,11 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" ) // BufferSpan is a lightweight Buffer holder for ArraySpans that does not diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go index 79452965b8f81..f5beb45ee1494 100644 --- a/go/arrow/compute/exec/span_test.go +++ b/go/arrow/compute/exec/span_test.go @@ -24,14 +24,14 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go index 88390eef9cdef..832f93f13165d 100644 --- a/go/arrow/compute/exec/utils.go +++ b/go/arrow/compute/exec/utils.go @@ -24,10 +24,10 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/constraints" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go index 82b2545b5ce61..b8b7212b538c5 100644 --- a/go/arrow/compute/exec/utils_test.go +++ b/go/arrow/compute/exec/utils_test.go @@ -21,10 +21,10 @@ package exec_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go index 34f14e69d06d4..f0c585f557ebc 100644 --- a/go/arrow/compute/exec_internals_test.go +++ b/go/arrow/compute/exec_internals_test.go @@ -24,13 +24,13 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go index 40b5e55ecb8d2..27f6676f3187c 100644 --- a/go/arrow/compute/exec_test.go +++ b/go/arrow/compute/exec_test.go @@ -22,12 +22,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go index 3e605db305013..1d197e4220ab2 100644 --- a/go/arrow/compute/executor.go +++ b/go/arrow/compute/executor.go @@ -25,14 +25,14 @@ import ( "runtime" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" ) // ExecCtx holds simple contextual information for execution diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go index 8dde6927a7c02..f6aadeda5634b 100644 --- a/go/arrow/compute/expression.go +++ b/go/arrow/compute/expression.go @@ -28,14 +28,14 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" ) var hashSeed = maphash.MakeSeed() diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go index 4f3188ea82d9f..1898bb3dc92b2 100644 --- a/go/arrow/compute/expression_test.go +++ b/go/arrow/compute/expression_test.go @@ -22,11 +22,11 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/exprs/builders.go b/go/arrow/compute/exprs/builders.go index 525aa2ade3fe5..a3af8dd6f287d 100644 --- a/go/arrow/compute/exprs/builders.go +++ b/go/arrow/compute/exprs/builders.go @@ -25,8 +25,8 @@ import ( "strings" "unicode" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" "github.com/substrait-io/substrait-go/expr" "github.com/substrait-io/substrait-go/extensions" "github.com/substrait-io/substrait-go/types" diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go index 167a4378f9dfa..21ad3bd642030 100644 --- a/go/arrow/compute/exprs/builders_test.go +++ b/go/arrow/compute/exprs/builders_test.go @@ -21,8 +21,8 @@ package exprs_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exprs" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exprs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/substrait-io/substrait-go/expr" diff --git a/go/arrow/compute/exprs/exec.go b/go/arrow/compute/exprs/exec.go index 28c360a2e7dec..850acbb3cd492 100644 --- a/go/arrow/compute/exprs/exec.go +++ b/go/arrow/compute/exprs/exec.go @@ -23,15 +23,15 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/substrait-io/substrait-go/expr" "github.com/substrait-io/substrait-go/extensions" "github.com/substrait-io/substrait-go/types" diff --git a/go/arrow/compute/exprs/exec_internal_test.go b/go/arrow/compute/exprs/exec_internal_test.go index e38ebb9fb8bd7..450db139e9357 100644 --- a/go/arrow/compute/exprs/exec_internal_test.go +++ b/go/arrow/compute/exprs/exec_internal_test.go @@ -23,10 +23,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/compute/exprs/exec_test.go b/go/arrow/compute/exprs/exec_test.go index 038a57a8408f8..b74f80057a0d7 100644 --- a/go/arrow/compute/exprs/exec_test.go +++ b/go/arrow/compute/exprs/exec_test.go @@ -23,12 +23,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exprs" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exprs" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/substrait-io/substrait-go/expr" diff --git a/go/arrow/compute/exprs/extension_types.go b/go/arrow/compute/exprs/extension_types.go index 5dd5d229b09a7..8177675592fc9 100644 --- a/go/arrow/compute/exprs/extension_types.go +++ b/go/arrow/compute/exprs/extension_types.go @@ -24,8 +24,8 @@ import ( "reflect" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) type simpleExtensionTypeFactory[P comparable] struct { diff --git a/go/arrow/compute/exprs/field_refs.go b/go/arrow/compute/exprs/field_refs.go index 888b7e605f8b8..0e039d9e26601 100644 --- a/go/arrow/compute/exprs/field_refs.go +++ b/go/arrow/compute/exprs/field_refs.go @@ -21,11 +21,11 @@ package exprs import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/substrait-io/substrait-go/expr" ) diff --git a/go/arrow/compute/exprs/types.go b/go/arrow/compute/exprs/types.go index c231a62c28426..594a55c9041a8 100644 --- a/go/arrow/compute/exprs/types.go +++ b/go/arrow/compute/exprs/types.go @@ -24,8 +24,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" "github.com/substrait-io/substrait-go/expr" "github.com/substrait-io/substrait-go/extensions" "github.com/substrait-io/substrait-go/types" diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go index ea8579f9b60b0..d69c7d91044c7 100644 --- a/go/arrow/compute/fieldref.go +++ b/go/arrow/compute/fieldref.go @@ -25,8 +25,8 @@ import ( "strings" "unicode" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) var ( diff --git a/go/arrow/compute/fieldref_hash.go b/go/arrow/compute/fieldref_hash.go index 5da85254e314b..21ef88f1ecb4f 100644 --- a/go/arrow/compute/fieldref_hash.go +++ b/go/arrow/compute/fieldref_hash.go @@ -23,7 +23,7 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func (f FieldPath) hash(h *maphash.Hash) { diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go index 45ca64acbcac2..ce2051f942271 100644 --- a/go/arrow/compute/fieldref_test.go +++ b/go/arrow/compute/fieldref_test.go @@ -19,10 +19,10 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go index 44f54e09dee94..ebade11a8e60b 100644 --- a/go/arrow/compute/functions.go +++ b/go/arrow/compute/functions.go @@ -23,8 +23,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" ) type Function interface { diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go index 0e40c0a2f8082..31a4cf124e845 100644 --- a/go/arrow/compute/functions_test.go +++ b/go/arrow/compute/functions_test.go @@ -21,8 +21,8 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go index 38aa073c76745..169fbba2f02af 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic.go @@ -24,11 +24,11 @@ import ( "math/bits" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go index 902c5f341032b..b818e45dc1bb4 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go @@ -21,9 +21,9 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" "golang.org/x/exp/constraints" "golang.org/x/sys/cpu" ) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go index 223e6c29b755a..89384aa09fc73 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go index 0c81f426c537b..fff54292e3b4b 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go index 0291989d9a45b..36619106c93cf 100644 --- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go +++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go @@ -19,8 +19,8 @@ package kernels import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go index 266b569df7b7d..66a49f2be0294 100644 --- a/go/arrow/compute/internal/kernels/boolean_cast.go +++ b/go/arrow/compute/internal/kernels/boolean_cast.go @@ -22,9 +22,9 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" ) func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error { diff --git a/go/arrow/compute/internal/kernels/cast.go b/go/arrow/compute/internal/kernels/cast.go index b7e3b59d7accc..8603d3ad1891f 100644 --- a/go/arrow/compute/internal/kernels/cast.go +++ b/go/arrow/compute/internal/kernels/cast.go @@ -19,9 +19,9 @@ package kernels import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" ) type CastOptions struct { diff --git a/go/arrow/compute/internal/kernels/cast_numeric.go b/go/arrow/compute/internal/kernels/cast_numeric.go index 88c295271fe5f..41ad94d83e68b 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric.go +++ b/go/arrow/compute/internal/kernels/cast_numeric.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) var castNumericUnsafe func(itype, otype arrow.Type, in, out []byte, len int) = castNumericGo diff --git a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go index 186fac16bae82..925b4328002d0 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go +++ b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go index 630bc06199f49..0b491244dcc44 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go +++ b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" "golang.org/x/sys/cpu" ) diff --git a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go index cea34a62a9489..4c19e06dc704e 100644 --- a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go +++ b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/cast_temporal.go b/go/arrow/compute/internal/kernels/cast_temporal.go index 0ad73737439c7..183d47654ec64 100644 --- a/go/arrow/compute/internal/kernels/cast_temporal.go +++ b/go/arrow/compute/internal/kernels/cast_temporal.go @@ -24,10 +24,10 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const millisecondsInDay = 86400000 diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go index a99670cbbb231..230a8e9112c29 100644 --- a/go/arrow/compute/internal/kernels/helpers.go +++ b/go/arrow/compute/internal/kernels/helpers.go @@ -22,13 +22,13 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/bitutils" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go index 286c45e41d7b2..ca3a9937594aa 100644 --- a/go/arrow/compute/internal/kernels/numeric_cast.go +++ b/go/arrow/compute/internal/kernels/numeric_cast.go @@ -23,13 +23,13 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/internal/bitutils" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/rounding.go b/go/arrow/compute/internal/kernels/rounding.go index ab05082513679..1afe76065f4de 100644 --- a/go/arrow/compute/internal/kernels/rounding.go +++ b/go/arrow/compute/internal/kernels/rounding.go @@ -22,11 +22,11 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/scalar" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go index 038cca507b379..f2b52a6bf7101 100644 --- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go +++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go @@ -22,13 +22,13 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" ) // scalar kernel that ignores (assumed all-null inputs) and returns null diff --git a/go/arrow/compute/internal/kernels/scalar_boolean.go b/go/arrow/compute/internal/kernels/scalar_boolean.go index 7dbf76d4e847f..f23a7f568b192 100644 --- a/go/arrow/compute/internal/kernels/scalar_boolean.go +++ b/go/arrow/compute/internal/kernels/scalar_boolean.go @@ -19,9 +19,9 @@ package kernels import ( - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/scalar" ) type computeWordFN func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go index 76de68a0ae44f..bf8594e8290f7 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" "golang.org/x/sys/cpu" ) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go index b98b538570554..220e65cafe291 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go index e897f767fd081..e7b4bce2362ba 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go @@ -18,7 +18,7 @@ package kernels -import "github.com/apache/arrow/go/v17/arrow" +import "github.com/apache/arrow/go/v18/arrow" func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { return genGoCompareKernel(getCmpOp[T](op)) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go index 739ee929136ea..7eea2dd1abb73 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go @@ -21,7 +21,7 @@ package kernels import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:noescape diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go index f7278d6b16e3a..dc43b74984a0d 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparisons.go +++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go @@ -23,14 +23,14 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/bitutils" ) type binaryKernel func(left, right, out []byte, offset int) diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go index ec64871d31263..6a50d6627140b 100644 --- a/go/arrow/compute/internal/kernels/string_casts.go +++ b/go/arrow/compute/internal/kernels/string_casts.go @@ -23,12 +23,12 @@ import ( "strconv" "unicode/utf8" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/bitutils" ) func validateUtf8Fsb(input *exec.ArraySpan) error { diff --git a/go/arrow/compute/internal/kernels/types.go b/go/arrow/compute/internal/kernels/types.go index a3df0b2db18ff..fb20ed02381fe 100644 --- a/go/arrow/compute/internal/kernels/types.go +++ b/go/arrow/compute/internal/kernels/types.go @@ -21,10 +21,10 @@ package kernels import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/scalar" ) var ( diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go index 6da52e075443f..57f925dc251b1 100644 --- a/go/arrow/compute/internal/kernels/vector_hash.go +++ b/go/arrow/compute/internal/kernels/vector_hash.go @@ -21,13 +21,13 @@ package kernels import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/hashing" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/hashing" ) type HashState interface { diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go index f9f517585699a..08f8cf44b9206 100644 --- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go +++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go @@ -24,14 +24,14 @@ import ( "sort" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) type RunEndEncodeState struct { diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go index b4fd90848a229..e96782f4cd5ea 100644 --- a/go/arrow/compute/internal/kernels/vector_selection.go +++ b/go/arrow/compute/internal/kernels/vector_selection.go @@ -22,13 +22,13 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" ) type NullSelectionBehavior int8 diff --git a/go/arrow/compute/registry.go b/go/arrow/compute/registry.go index 8c42cc9724283..cb64c7e09de0a 100644 --- a/go/arrow/compute/registry.go +++ b/go/arrow/compute/registry.go @@ -21,7 +21,7 @@ package compute import ( "sync" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" "golang.org/x/exp/maps" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/registry_test.go b/go/arrow/compute/registry_test.go index 783f75e115333..15e561ada42d3 100644 --- a/go/arrow/compute/registry_test.go +++ b/go/arrow/compute/registry_test.go @@ -23,9 +23,9 @@ import ( "errors" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" "github.com/stretchr/testify/assert" "golang.org/x/exp/slices" ) diff --git a/go/arrow/compute/scalar_bool.go b/go/arrow/compute/scalar_bool.go index ed6007af9234b..39f4242163d49 100644 --- a/go/arrow/compute/scalar_bool.go +++ b/go/arrow/compute/scalar_bool.go @@ -21,9 +21,9 @@ package compute import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/scalar_bool_test.go b/go/arrow/compute/scalar_bool_test.go index 2ae7f2cba532b..4b2c5d54f8ae2 100644 --- a/go/arrow/compute/scalar_bool_test.go +++ b/go/arrow/compute/scalar_bool_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/compute/scalar_compare.go b/go/arrow/compute/scalar_compare.go index 7092431a01549..8f51440bc6362 100644 --- a/go/arrow/compute/scalar_compare.go +++ b/go/arrow/compute/scalar_compare.go @@ -21,9 +21,9 @@ package compute import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) type compareFunction struct { diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go index 9f1115312829f..1f1b65bd0f25f 100644 --- a/go/arrow/compute/scalar_compare_test.go +++ b/go/arrow/compute/scalar_compare_test.go @@ -24,15 +24,15 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/compute/selection.go b/go/arrow/compute/selection.go index e2966189c9f6a..4aeaa8884649d 100644 --- a/go/arrow/compute/selection.go +++ b/go/arrow/compute/selection.go @@ -22,10 +22,10 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" "golang.org/x/sync/errgroup" ) diff --git a/go/arrow/compute/utils.go b/go/arrow/compute/utils.go index 899fe4cfbf4cc..909d0b68868f6 100644 --- a/go/arrow/compute/utils.go +++ b/go/arrow/compute/utils.go @@ -23,13 +23,13 @@ import ( "io" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" "golang.org/x/xerrors" ) diff --git a/go/arrow/compute/vector_hash.go b/go/arrow/compute/vector_hash.go index 7fcbce19ada4c..8692a6fff074c 100644 --- a/go/arrow/compute/vector_hash.go +++ b/go/arrow/compute/vector_hash.go @@ -21,8 +21,8 @@ package compute import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go index 87a4eb6806a9b..58ff1263ca880 100644 --- a/go/arrow/compute/vector_hash_test.go +++ b/go/arrow/compute/vector_hash_test.go @@ -23,12 +23,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/compute/vector_run_end_test.go b/go/arrow/compute/vector_run_end_test.go index a6cc1306a625b..8c8e776feb23c 100644 --- a/go/arrow/compute/vector_run_end_test.go +++ b/go/arrow/compute/vector_run_end_test.go @@ -25,13 +25,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/compute/vector_run_ends.go b/go/arrow/compute/vector_run_ends.go index 09b872f486a01..5dfdde4e00948 100644 --- a/go/arrow/compute/vector_run_ends.go +++ b/go/arrow/compute/vector_run_ends.go @@ -21,8 +21,8 @@ package compute import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" ) var ( diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go index 2331a2d0342b5..6fcb5c242f151 100644 --- a/go/arrow/compute/vector_selection_test.go +++ b/go/arrow/compute/vector_selection_test.go @@ -24,15 +24,15 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/compute/exec" - "github.com/apache/arrow/go/v17/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v17/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/compute/exec" + "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" + "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 06fed69a77fe5..ed254ae35b353 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -23,8 +23,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" ) var ( diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go index 46591a9a5adee..12d607b26c48d 100644 --- a/go/arrow/csv/reader.go +++ b/go/arrow/csv/reader.go @@ -29,13 +29,13 @@ import ( "time" "unicode/utf8" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) // Reader wraps encoding/csv.Reader and creates array.Records from a schema. diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index 65453db015a7e..b0775b9b11a96 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -25,13 +25,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/csv" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/csv" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go index 237437c0441e1..f99d047e3c8cf 100644 --- a/go/arrow/csv/transformer.go +++ b/go/arrow/csv/transformer.go @@ -25,8 +25,8 @@ import ( "math/big" "strconv" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string) string) []string { diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go index 243d83f7d847f..d0efbde170d65 100644 --- a/go/arrow/csv/writer.go +++ b/go/arrow/csv/writer.go @@ -22,7 +22,7 @@ import ( "strconv" "sync" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema. diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go index ef9769fc32876..be9ab961c3ef7 100644 --- a/go/arrow/csv/writer_test.go +++ b/go/arrow/csv/writer_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/csv" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/csv" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/google/uuid" ) diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go index 8fff5e652572b..96b7bf65505ec 100644 --- a/go/arrow/datatype.go +++ b/go/arrow/datatype.go @@ -21,7 +21,7 @@ import ( "hash/maphash" "strings" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Type is a logical type. They can be expressed as diff --git a/go/arrow/datatype_binary_test.go b/go/arrow/datatype_binary_test.go index 41dee140c5a44..a65d92a0f61ac 100644 --- a/go/arrow/datatype_binary_test.go +++ b/go/arrow/datatype_binary_test.go @@ -19,7 +19,7 @@ package arrow_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func TestBinaryType(t *testing.T) { diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go index 92fcacd620586..c3e595f523e57 100644 --- a/go/arrow/datatype_extension_test.go +++ b/go/arrow/datatype_extension_test.go @@ -20,8 +20,8 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index a1c2834b8d574..8bcae78d7d8af 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -22,7 +22,7 @@ import ( "sync" "time" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/internal/json" "golang.org/x/xerrors" ) diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go index f4f3b071ff5d4..fbd1334626774 100644 --- a/go/arrow/datatype_fixedwidth_test.go +++ b/go/arrow/datatype_fixedwidth_test.go @@ -21,7 +21,7 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go index b38983b7f2e5d..579b2c3306003 100644 --- a/go/arrow/datatype_nested.go +++ b/go/arrow/datatype_nested.go @@ -22,7 +22,7 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) type ( diff --git a/go/arrow/datatype_null_test.go b/go/arrow/datatype_null_test.go index b72395843ef8b..83b3f0c44c549 100644 --- a/go/arrow/datatype_null_test.go +++ b/go/arrow/datatype_null_test.go @@ -19,7 +19,7 @@ package arrow_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func TestNullType(t *testing.T) { diff --git a/go/arrow/datatype_viewheader.go b/go/arrow/datatype_viewheader.go index 8dd2fa4881e2f..e153251caaf03 100644 --- a/go/arrow/datatype_viewheader.go +++ b/go/arrow/datatype_viewheader.go @@ -20,9 +20,9 @@ import ( "bytes" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" ) const ( diff --git a/go/arrow/datatype_viewheader_inline.go b/go/arrow/datatype_viewheader_inline.go index 24f518d0e236a..2883ee380308e 100644 --- a/go/arrow/datatype_viewheader_inline.go +++ b/go/arrow/datatype_viewheader_inline.go @@ -21,7 +21,7 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func (sh *ViewHeader) InlineString() (data string) { diff --git a/go/arrow/datatype_viewheader_inline_go1.19.go b/go/arrow/datatype_viewheader_inline_go1.19.go index 2f7299ec111b4..d72c0d6f17c2b 100644 --- a/go/arrow/datatype_viewheader_inline_go1.19.go +++ b/go/arrow/datatype_viewheader_inline_go1.19.go @@ -22,7 +22,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func (sh *ViewHeader) InlineString() (data string) { diff --git a/go/arrow/datatype_viewheader_inline_tinygo.go b/go/arrow/datatype_viewheader_inline_tinygo.go index b309c07710e3e..a342167972fe4 100644 --- a/go/arrow/datatype_viewheader_inline_tinygo.go +++ b/go/arrow/datatype_viewheader_inline_tinygo.go @@ -22,7 +22,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) func (sh *ViewHeader) InlineString() (data string) { diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go index e4b19797d5462..00ab253003559 100644 --- a/go/arrow/decimal128/decimal128.go +++ b/go/arrow/decimal128/decimal128.go @@ -23,7 +23,7 @@ import ( "math/big" "math/bits" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const ( diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go index 11e293ee3ebb0..18443512a36da 100644 --- a/go/arrow/decimal128/decimal128_test.go +++ b/go/arrow/decimal128/decimal128_test.go @@ -22,7 +22,7 @@ import ( "math/big" "testing" - "github.com/apache/arrow/go/v17/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal128" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go index d5abfc2298a40..8244d2cd8334c 100644 --- a/go/arrow/decimal256/decimal256.go +++ b/go/arrow/decimal256/decimal256.go @@ -23,8 +23,8 @@ import ( "math/big" "math/bits" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) const ( diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go index 07c147bacf2f1..3d2ee8c543597 100644 --- a/go/arrow/decimal256/decimal256_test.go +++ b/go/arrow/decimal256/decimal256_test.go @@ -23,7 +23,7 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/decimal256" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/doc.go b/go/arrow/doc.go index 93e838d20e4d0..30e6b7eb8d6ea 100644 --- a/go/arrow/doc.go +++ b/go/arrow/doc.go @@ -34,7 +34,7 @@ To build with tinygo include the noasm build tag. */ package arrow -const PkgVersion = "17.0.0-SNAPSHOT" +const PkgVersion = "18.0.0-SNAPSHOT" //go:generate go run _tools/tmpl/main.go -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl //go:generate go run _tools/tmpl/main.go -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl tensor/numeric.gen.go.tmpl tensor/numeric.gen_test.go.tmpl diff --git a/go/arrow/encoded/ree_utils.go b/go/arrow/encoded/ree_utils.go index 0493d71f31f5c..822edd0303703 100644 --- a/go/arrow/encoded/ree_utils.go +++ b/go/arrow/encoded/ree_utils.go @@ -20,7 +20,7 @@ import ( "math" "sort" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // FindPhysicalIndex performs a binary search on the run-ends to return diff --git a/go/arrow/encoded/ree_utils_test.go b/go/arrow/encoded/ree_utils_test.go index 99b8252d1d7c2..43a4f83b3b999 100644 --- a/go/arrow/encoded/ree_utils_test.go +++ b/go/arrow/encoded/ree_utils_test.go @@ -21,10 +21,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/endian/endian.go b/go/arrow/endian/endian.go index 234c58cde1d14..8ac496b996938 100644 --- a/go/arrow/endian/endian.go +++ b/go/arrow/endian/endian.go @@ -17,8 +17,8 @@ package endian import ( - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" ) type Endianness flatbuf.Endianness diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go index fbb092ef5baeb..3dac447ae7c83 100644 --- a/go/arrow/example_test.go +++ b/go/arrow/example_test.go @@ -20,10 +20,10 @@ import ( "fmt" "log" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) // This example demonstrates how to build an array of int64 values using a builder and Append. diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go index c602521714727..dac10e2657085 100755 --- a/go/arrow/flight/basic_auth_flight_test.go +++ b/go/arrow/flight/basic_auth_flight_test.go @@ -22,7 +22,7 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go index b049072e19199..13c068e159f2b 100644 --- a/go/arrow/flight/client.go +++ b/go/arrow/flight/client.go @@ -26,7 +26,7 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go index 53e60fb70b7ff..2e2d02bc21fb5 100644 --- a/go/arrow/flight/cookie_middleware_test.go +++ b/go/arrow/flight/cookie_middleware_test.go @@ -28,7 +28,7 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/grpc" diff --git a/go/arrow/flight/example_flight_server_test.go b/go/arrow/flight/example_flight_server_test.go index e5e684d951541..9dd7bc8efef48 100755 --- a/go/arrow/flight/example_flight_server_test.go +++ b/go/arrow/flight/example_flight_server_test.go @@ -23,7 +23,7 @@ import ( "io" "log" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go index 980987d5d4d0f..181b41ea41f2a 100755 --- a/go/arrow/flight/flight_middleware_test.go +++ b/go/arrow/flight/flight_middleware_test.go @@ -23,8 +23,8 @@ import ( sync "sync" "testing" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/grpc" diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go index 47c8183b3883d..fe896f39a2b21 100755 --- a/go/arrow/flight/flight_test.go +++ b/go/arrow/flight/flight_test.go @@ -23,11 +23,11 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index c6794820dc172..4a600e5253e9b 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -22,12 +22,12 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index 33da79167c4ae..7604b554cbc6c 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -22,12 +22,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" "google.golang.org/grpc" diff --git a/go/arrow/flight/flightsql/column_metadata.go b/go/arrow/flight/flightsql/column_metadata.go index ca9c633ab1e0f..60e48b5e0f5d4 100644 --- a/go/arrow/flight/flightsql/column_metadata.go +++ b/go/arrow/flight/flightsql/column_metadata.go @@ -19,7 +19,7 @@ package flightsql import ( "strconv" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) const ( diff --git a/go/arrow/flight/flightsql/driver/README.md b/go/arrow/flight/flightsql/driver/README.md index 5187c7e4248ec..802d050042c66 100644 --- a/go/arrow/flight/flightsql/driver/README.md +++ b/go/arrow/flight/flightsql/driver/README.md @@ -36,7 +36,7 @@ connection pooling, transactions combined with ease of use (see (#usage)). ## Prerequisites * Go 1.17+ -* Installation via `go get -u github.com/apache/arrow/go/v17/arrow/flight/flightsql` +* Installation via `go get -u github.com/apache/arrow/go/v18/arrow/flight/flightsql` * Backend speaking FlightSQL --------------------------------------- @@ -55,7 +55,7 @@ import ( "database/sql" "time" - _ "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + _ "github.com/apache/arrow/go/v18/arrow/flight/flightsql" ) // Open the connection to an SQLite backend @@ -141,7 +141,7 @@ import ( "log" "time" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" ) func main() { diff --git a/go/arrow/flight/flightsql/driver/config_test.go b/go/arrow/flight/flightsql/driver/config_test.go index 6984da68494be..d74f9d84d0f1e 100644 --- a/go/arrow/flight/flightsql/driver/config_test.go +++ b/go/arrow/flight/flightsql/driver/config_test.go @@ -22,7 +22,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/driver" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/driver" ) func TestConfigTLSRegistry(t *testing.T) { diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go index cd0487e7ad275..0f2b02deaca7c 100644 --- a/go/arrow/flight/flightsql/driver/driver.go +++ b/go/arrow/flight/flightsql/driver/driver.go @@ -26,11 +26,11 @@ import ( "sync" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc" "google.golang.org/grpc/credentials" diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go index d4361eb2dd722..e5060ccbe33d0 100644 --- a/go/arrow/flight/flightsql/driver/driver_test.go +++ b/go/arrow/flight/flightsql/driver/driver_test.go @@ -34,13 +34,13 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/driver" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/example" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/driver" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v18/arrow/memory" ) const defaultTableName = "drivertest" diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go index d07ed446b2bcb..a206d7753529d 100644 --- a/go/arrow/flight/flightsql/driver/utils.go +++ b/go/arrow/flight/flightsql/driver/utils.go @@ -21,8 +21,8 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) // *** GRPC helpers *** diff --git a/go/arrow/flight/flightsql/driver/utils_test.go b/go/arrow/flight/flightsql/driver/utils_test.go index 2c87075423e0a..0f6033b9282ea 100644 --- a/go/arrow/flight/flightsql/driver/utils_test.go +++ b/go/arrow/flight/flightsql/driver/utils_test.go @@ -22,12 +22,12 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go index 6bce68d7949dc..529feeb04c88c 100644 --- a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go +++ b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go @@ -27,9 +27,9 @@ import ( "os" "strconv" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/example" ) func main() { diff --git a/go/arrow/flight/flightsql/example/sql_batch_reader.go b/go/arrow/flight/flightsql/example/sql_batch_reader.go index cb806ef1b27ab..bfd3e354b17e9 100644 --- a/go/arrow/flight/flightsql/example/sql_batch_reader.go +++ b/go/arrow/flight/flightsql/example/sql_batch_reader.go @@ -26,11 +26,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/wrapperspb" diff --git a/go/arrow/flight/flightsql/example/sqlite_info.go b/go/arrow/flight/flightsql/example/sqlite_info.go index ea9d7b98aade4..6135911c7b908 100644 --- a/go/arrow/flight/flightsql/example/sqlite_info.go +++ b/go/arrow/flight/flightsql/example/sqlite_info.go @@ -20,8 +20,8 @@ package example import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" ) func SqlInfoResultMap() flightsql.SqlInfoResultMap { diff --git a/go/arrow/flight/flightsql/example/sqlite_server.go b/go/arrow/flight/flightsql/example/sqlite_server.go index b5a64d57dceb6..6a2b80e0dbc36 100644 --- a/go/arrow/flight/flightsql/example/sqlite_server.go +++ b/go/arrow/flight/flightsql/example/sqlite_server.go @@ -45,13 +45,13 @@ import ( "strings" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" diff --git a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go index 4786110e232a8..373be784b9145 100644 --- a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go +++ b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go @@ -25,12 +25,12 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" sqlite3 "modernc.org/sqlite/lib" ) diff --git a/go/arrow/flight/flightsql/example/type_info.go b/go/arrow/flight/flightsql/example/type_info.go index cfe47b4090fe7..5e5e52cf2a4cf 100644 --- a/go/arrow/flight/flightsql/example/type_info.go +++ b/go/arrow/flight/flightsql/example/type_info.go @@ -22,10 +22,10 @@ package example import ( "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/memory" ) func GetTypeInfoResult(mem memory.Allocator) arrow.Record { diff --git a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go index 5b2684bae1694..d9ba7767feb83 100644 --- a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go +++ b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go @@ -18,7 +18,7 @@ // by FlightSQL servers and clients. package schema_ref -import "github.com/apache/arrow/go/v17/arrow" +import "github.com/apache/arrow/go/v18/arrow" var ( Catalogs = arrow.NewSchema( diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go index 25bdc5f4d5bb3..b085619745c90 100644 --- a/go/arrow/flight/flightsql/server.go +++ b/go/arrow/flight/flightsql/server.go @@ -20,14 +20,14 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go index e594f8e773fd2..494dda1703fc4 100644 --- a/go/arrow/flight/flightsql/server_test.go +++ b/go/arrow/flight/flightsql/server_test.go @@ -22,13 +22,13 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" - "github.com/apache/arrow/go/v17/arrow/flight/session" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/flight/session" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "google.golang.org/grpc" diff --git a/go/arrow/flight/flightsql/sql_info.go b/go/arrow/flight/flightsql/sql_info.go index 662d809955522..2cd7f91cfd70a 100644 --- a/go/arrow/flight/flightsql/sql_info.go +++ b/go/arrow/flight/flightsql/sql_info.go @@ -17,8 +17,8 @@ package flightsql import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) const ( diff --git a/go/arrow/flight/flightsql/sqlite_server_test.go b/go/arrow/flight/flightsql/sqlite_server_test.go index eb30b0aa50695..fee2475b2b2ec 100644 --- a/go/arrow/flight/flightsql/sqlite_server_test.go +++ b/go/arrow/flight/flightsql/sqlite_server_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/example" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "google.golang.org/grpc/codes" diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go index c709a865da7bb..88840cd7d6caf 100644 --- a/go/arrow/flight/flightsql/types.go +++ b/go/arrow/flight/flightsql/types.go @@ -17,7 +17,7 @@ package flightsql import ( - pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" + pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go index 8332fedd2dc0a..9067e9e2982e5 100644 --- a/go/arrow/flight/record_batch_reader.go +++ b/go/arrow/flight/record_batch_reader.go @@ -21,13 +21,13 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // DataStreamReader is an interface for receiving flight data messages on a stream diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go index ca0354e00bc99..27211277ab061 100644 --- a/go/arrow/flight/record_batch_writer.go +++ b/go/arrow/flight/record_batch_writer.go @@ -19,9 +19,9 @@ package flight import ( "bytes" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) // DataStreamWriter is an interface that represents an Arrow Flight stream diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go index fc74ba7aa4d6e..b67e52f4357ce 100644 --- a/go/arrow/flight/server.go +++ b/go/arrow/flight/server.go @@ -23,7 +23,7 @@ import ( "os" "os/signal" - "github.com/apache/arrow/go/v17/arrow/flight/gen/flight" + "github.com/apache/arrow/go/v18/arrow/flight/gen/flight" "google.golang.org/grpc" ) diff --git a/go/arrow/flight/server_example_test.go b/go/arrow/flight/server_example_test.go index 8386147c311aa..60e5ec8f4e381 100644 --- a/go/arrow/flight/server_example_test.go +++ b/go/arrow/flight/server_example_test.go @@ -21,7 +21,7 @@ import ( "fmt" "net" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/health" diff --git a/go/arrow/flight/session/example_session_test.go b/go/arrow/flight/session/example_session_test.go index 7d92f27f2c840..5a9e7e83d21e0 100644 --- a/go/arrow/flight/session/example_session_test.go +++ b/go/arrow/flight/session/example_session_test.go @@ -19,9 +19,9 @@ package session_test import ( "log" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/session" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/session" "github.com/google/uuid" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/session/session.go b/go/arrow/flight/session/session.go index f09a2ed620d8f..508f79befc258 100644 --- a/go/arrow/flight/session/session.go +++ b/go/arrow/flight/session/session.go @@ -36,7 +36,7 @@ import ( "net/http" "sync" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/metadata" "google.golang.org/protobuf/proto" diff --git a/go/arrow/flight/session/stateful_session.go b/go/arrow/flight/session/stateful_session.go index 1a339c1bc4eb7..0fb1c8f729756 100644 --- a/go/arrow/flight/session/stateful_session.go +++ b/go/arrow/flight/session/stateful_session.go @@ -22,7 +22,7 @@ import ( "net/http" "sync" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "github.com/google/uuid" ) diff --git a/go/arrow/flight/session/stateless_session.go b/go/arrow/flight/session/stateless_session.go index 1e248a7a5e92d..d792a91f84ece 100644 --- a/go/arrow/flight/session/stateless_session.go +++ b/go/arrow/flight/session/stateless_session.go @@ -22,7 +22,7 @@ import ( "fmt" "net/http" - "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight" "google.golang.org/protobuf/proto" ) diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go index 9a049c762a364..5111f2dbc4da6 100644 --- a/go/arrow/internal/arrdata/arrdata.go +++ b/go/arrow/internal/arrdata/arrdata.go @@ -21,14 +21,14 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" ) var ( diff --git a/go/arrow/internal/arrdata/ioutil.go b/go/arrow/internal/arrdata/ioutil.go index 63fac86fde948..715451ad9634d 100644 --- a/go/arrow/internal/arrdata/ioutil.go +++ b/go/arrow/internal/arrdata/ioutil.go @@ -23,11 +23,11 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) // CheckArrowFile checks whether a given ARROW file contains the expected list of records. diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index 3f41f1b40a38e..42bbee8d5a2ec 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -26,16 +26,16 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" ) type Schema struct { diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index 08a879e7ea3e3..7459ef8ae8f1d 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -22,9 +22,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/internal/arrjson/option.go b/go/arrow/internal/arrjson/option.go index ec53f1e8f43b9..261bc75b64e6f 100644 --- a/go/arrow/internal/arrjson/option.go +++ b/go/arrow/internal/arrjson/option.go @@ -17,8 +17,8 @@ package arrjson import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" ) type config struct { diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go index 8be44c001e180..97fe2904cbe5f 100644 --- a/go/arrow/internal/arrjson/reader.go +++ b/go/arrow/internal/arrjson/reader.go @@ -20,11 +20,11 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/internal/json" ) type Reader struct { diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go index 1d1565885ba6b..25004863abe0d 100644 --- a/go/arrow/internal/arrjson/writer.go +++ b/go/arrow/internal/arrjson/writer.go @@ -20,11 +20,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/internal/json" ) const ( diff --git a/go/arrow/internal/cdata_integration/entrypoints.go b/go/arrow/internal/cdata_integration/entrypoints.go index 59e1db52b50e0..06f7cc8a41019 100644 --- a/go/arrow/internal/cdata_integration/entrypoints.go +++ b/go/arrow/internal/cdata_integration/entrypoints.go @@ -25,11 +25,11 @@ import ( "runtime" "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/cdata" - "github.com/apache/arrow/go/v17/arrow/internal/arrjson" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/cdata" + "github.com/apache/arrow/go/v18/arrow/internal/arrjson" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // #include diff --git a/go/arrow/internal/dictutils/dict.go b/go/arrow/internal/dictutils/dict.go index 76382a3dbcac2..da18c2d0e3fae 100644 --- a/go/arrow/internal/dictutils/dict.go +++ b/go/arrow/internal/dictutils/dict.go @@ -21,9 +21,9 @@ import ( "fmt" "hash/maphash" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) type Kind int8 diff --git a/go/arrow/internal/dictutils/dict_test.go b/go/arrow/internal/dictutils/dict_test.go index 393ad5d9e2de7..7a68ae3073ddb 100644 --- a/go/arrow/internal/dictutils/dict_test.go +++ b/go/arrow/internal/dictutils/dict_test.go @@ -20,10 +20,10 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestDictMemo(t *testing.T) { diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go index 8d7a3617f2ada..105491d38e936 100755 --- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go +++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go @@ -22,7 +22,7 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v17/arrow/internal/flight_integration" + "github.com/apache/arrow/go/v18/arrow/internal/flight_integration" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go index 2ae02789b79e7..5de4076b708b2 100644 --- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go +++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go @@ -23,7 +23,7 @@ import ( "os" "syscall" - "github.com/apache/arrow/go/v17/arrow/internal/flight_integration" + "github.com/apache/arrow/go/v18/arrow/internal/flight_integration" ) var ( diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go index ccfc7a0ed45a3..1528bb05d9daa 100644 --- a/go/arrow/internal/flight_integration/scenario.go +++ b/go/arrow/internal/flight_integration/scenario.go @@ -31,16 +31,16 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql" - "github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref" - "github.com/apache/arrow/go/v17/arrow/flight/session" - "github.com/apache/arrow/go/v17/arrow/internal/arrjson" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql" + "github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v18/arrow/flight/session" + "github.com/apache/arrow/go/v18/arrow/internal/arrjson" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "golang.org/x/xerrors" "google.golang.org/grpc" "google.golang.org/grpc/codes" diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go index 05a8749d15dce..b4623bc4c3596 100644 --- a/go/arrow/internal/testing/gen/random_array_gen.go +++ b/go/arrow/internal/testing/gen/random_array_gen.go @@ -19,11 +19,11 @@ package gen import ( "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) diff --git a/go/arrow/internal/testing/tools/bits_test.go b/go/arrow/internal/testing/tools/bits_test.go index e9a638a6b9b2a..6897485e4c702 100644 --- a/go/arrow/internal/testing/tools/bits_test.go +++ b/go/arrow/internal/testing/tools/bits_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/internal/testing/tools/data_types.go b/go/arrow/internal/testing/tools/data_types.go index bbfa1af0f6703..545e3f8bc3a2c 100644 --- a/go/arrow/internal/testing/tools/data_types.go +++ b/go/arrow/internal/testing/tools/data_types.go @@ -21,8 +21,8 @@ package tools import ( "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go index 2f1f2346f1d3b..bafd0cf0f6926 100644 --- a/go/arrow/internal/utils.go +++ b/go/arrow/internal/utils.go @@ -17,8 +17,8 @@ package internal import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" ) const CurMetadataVersion = flatbuf.MetadataVersionV5 diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index 4faaabb05ddc1..3e8d47a86c249 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -63,8 +63,8 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-cat/main_test.go b/go/arrow/ipc/cmd/arrow-cat/main_test.go index 53b89fadaf89f..904480ed374d4 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main_test.go +++ b/go/arrow/ipc/cmd/arrow-cat/main_test.go @@ -23,10 +23,10 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestCatStream(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go index 514f8d42ce3a4..e8cdcd66ea3cb 100644 --- a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go +++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go @@ -24,9 +24,9 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go index f671ff1d0987c..e31430f5fa392 100644 --- a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go +++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestFileToStream(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go index 31669f6a0f00e..b3e1dcac14119 100644 --- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go +++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go @@ -22,12 +22,12 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/arrjson" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/arrjson" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/internal/types" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go index 1aaf1430b4ff8..44e6aeb472f32 100644 --- a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go +++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go @@ -20,8 +20,8 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestIntegration(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 2f54744c4068d..f461131786d02 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -61,8 +61,8 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-ls/main_test.go b/go/arrow/ipc/cmd/arrow-ls/main_test.go index 2c123c064a638..6ec8ef862bc14 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main_test.go +++ b/go/arrow/ipc/cmd/arrow-ls/main_test.go @@ -23,10 +23,10 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestLsStream(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go index 3ea635ec03a2a..5c9b768995ec9 100644 --- a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go +++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go @@ -24,9 +24,9 @@ import ( "log" "os" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go index 2ae0c7c64624f..13c566f3593cd 100644 --- a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go +++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestStreamToFile(t *testing.T) { diff --git a/go/arrow/ipc/compression.go b/go/arrow/ipc/compression.go index 12bb5d402d5e6..501d9b7c38159 100644 --- a/go/arrow/ipc/compression.go +++ b/go/arrow/ipc/compression.go @@ -19,9 +19,9 @@ package ipc import ( "io" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/klauspost/compress/zstd" "github.com/pierrec/lz4/v4" ) diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go index af4dead45a6df..f10adf5c13e7d 100644 --- a/go/arrow/ipc/endian_swap.go +++ b/go/arrow/ipc/endian_swap.go @@ -21,9 +21,9 @@ import ( "fmt" "math/bits" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) // swap the endianness of the array's buffers as needed in-place to save diff --git a/go/arrow/ipc/endian_swap_test.go b/go/arrow/ipc/endian_swap_test.go index 102ae4364345f..498b7decad146 100644 --- a/go/arrow/ipc/endian_swap_test.go +++ b/go/arrow/ipc/endian_swap_test.go @@ -20,11 +20,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index 031a028a558be..947bbd474328f 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -23,14 +23,14 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) // FileReader is an Arrow file reader. diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go index a24f61e8c29c9..dea63579cfea6 100644 --- a/go/arrow/ipc/file_test.go +++ b/go/arrow/ipc/file_test.go @@ -21,9 +21,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestFile(t *testing.T) { diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go index 8cea458192b85..8582c81baf2fe 100644 --- a/go/arrow/ipc/file_writer.go +++ b/go/arrow/ipc/file_writer.go @@ -21,11 +21,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) // PayloadWriter is an interface for injecting a different payloadwriter diff --git a/go/arrow/ipc/ipc.go b/go/arrow/ipc/ipc.go index b31a358a8a8e0..76d12f061efa5 100644 --- a/go/arrow/ipc/ipc.go +++ b/go/arrow/ipc/ipc.go @@ -19,10 +19,10 @@ package ipc import ( "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) const ( diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go index 17499c45b2059..7df9bc8c28bb0 100644 --- a/go/arrow/ipc/ipc_test.go +++ b/go/arrow/ipc/ipc_test.go @@ -29,11 +29,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestArrow12072(t *testing.T) { diff --git a/go/arrow/ipc/message.go b/go/arrow/ipc/message.go index 23142d04c8229..897f031791b2b 100644 --- a/go/arrow/ipc/message.go +++ b/go/arrow/ipc/message.go @@ -22,9 +22,9 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) // MetadataVersion represents the Arrow metadata version. diff --git a/go/arrow/ipc/message_test.go b/go/arrow/ipc/message_test.go index f81b0cb2845f7..e5760c6f70719 100644 --- a/go/arrow/ipc/message_test.go +++ b/go/arrow/ipc/message_test.go @@ -22,9 +22,9 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestMessageReaderBodyInAllocator(t *testing.T) { diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go index 604adab322959..ba90c993885d6 100644 --- a/go/arrow/ipc/metadata.go +++ b/go/arrow/ipc/metadata.go @@ -23,11 +23,11 @@ import ( "io" "sort" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" flatbuffers "github.com/google/flatbuffers/go" ) diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go index f24ac204129e2..33bc63c2a0068 100644 --- a/go/arrow/ipc/metadata_test.go +++ b/go/arrow/ipc/metadata_test.go @@ -21,12 +21,12 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" flatbuffers "github.com/google/flatbuffers/go" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go index aeb47cfbd25c5..147b22213debf 100644 --- a/go/arrow/ipc/reader.go +++ b/go/arrow/ipc/reader.go @@ -23,14 +23,14 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // Reader reads records from an io.Reader. diff --git a/go/arrow/ipc/reader_test.go b/go/arrow/ipc/reader_test.go index 556d372881ec0..44aee19ecadd6 100644 --- a/go/arrow/ipc/reader_test.go +++ b/go/arrow/ipc/reader_test.go @@ -22,9 +22,9 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/ipc/stream_test.go b/go/arrow/ipc/stream_test.go index 14937a3caca97..b044acff5350d 100644 --- a/go/arrow/ipc/stream_test.go +++ b/go/arrow/ipc/stream_test.go @@ -22,9 +22,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow/internal/arrdata" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/internal/arrdata" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" ) func TestStream(t *testing.T) { diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index 63c0b612670b0..ca4f77d35e17f 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -26,15 +26,15 @@ import ( "sync" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/internal/dictutils" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/dictutils" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) type swriter struct { diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go index 4e519ed293422..e5683243e4546 100644 --- a/go/arrow/ipc/writer_test.go +++ b/go/arrow/ipc/writer_test.go @@ -24,11 +24,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/math/float64.go b/go/arrow/math/float64.go index 4f8fca9e0e959..b5429e50aec09 100644 --- a/go/arrow/math/float64.go +++ b/go/arrow/math/float64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) type Float64Funcs struct { diff --git a/go/arrow/math/float64_avx2_amd64.go b/go/arrow/math/float64_avx2_amd64.go index 73f0126e30f67..8f11b1f2481a3 100644 --- a/go/arrow/math/float64_avx2_amd64.go +++ b/go/arrow/math/float64_avx2_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_neon_arm64.go b/go/arrow/math/float64_neon_arm64.go index 77f97f5e68bd8..c41801714ea20 100755 --- a/go/arrow/math/float64_neon_arm64.go +++ b/go/arrow/math/float64_neon_arm64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_sse4_amd64.go b/go/arrow/math/float64_sse4_amd64.go index ea1a1a009011f..bdd17559edfaf 100644 --- a/go/arrow/math/float64_sse4_amd64.go +++ b/go/arrow/math/float64_sse4_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_test.go b/go/arrow/math/float64_test.go index 637db6e42b556..de1a1ef1ec3be 100644 --- a/go/arrow/math/float64_test.go +++ b/go/arrow/math/float64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/int64.go b/go/arrow/math/int64.go index 457e9d37465d9..a7d2b76b69704 100644 --- a/go/arrow/math/int64.go +++ b/go/arrow/math/int64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) type Int64Funcs struct { diff --git a/go/arrow/math/int64_avx2_amd64.go b/go/arrow/math/int64_avx2_amd64.go index 791436adb0a15..353338d43282a 100644 --- a/go/arrow/math/int64_avx2_amd64.go +++ b/go/arrow/math/int64_avx2_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_neon_arm64.go b/go/arrow/math/int64_neon_arm64.go index 6439e00e9f999..29c5a8eed6c89 100755 --- a/go/arrow/math/int64_neon_arm64.go +++ b/go/arrow/math/int64_neon_arm64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_sse4_amd64.go b/go/arrow/math/int64_sse4_amd64.go index dca70c838baf2..cf443c5f1a7ec 100644 --- a/go/arrow/math/int64_sse4_amd64.go +++ b/go/arrow/math/int64_sse4_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_test.go b/go/arrow/math/int64_test.go index 4e6f808db0516..0719d6955b367 100644 --- a/go/arrow/math/int64_test.go +++ b/go/arrow/math/int64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/type.go.tmpl b/go/arrow/math/type.go.tmpl index a4e25ae574548..28becffdb3842 100644 --- a/go/arrow/math/type.go.tmpl +++ b/go/arrow/math/type.go.tmpl @@ -17,7 +17,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) {{$def := .D}} diff --git a/go/arrow/math/type_simd_amd64.go.tmpl b/go/arrow/math/type_simd_amd64.go.tmpl index 86b31e31152bc..cb11dc0ff808c 100644 --- a/go/arrow/math/type_simd_amd64.go.tmpl +++ b/go/arrow/math/type_simd_amd64.go.tmpl @@ -21,7 +21,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) {{$name := printf "%s_%s" .In.Type .D.arch}} diff --git a/go/arrow/math/type_simd_arm64.go.tmpl b/go/arrow/math/type_simd_arm64.go.tmpl index 86b31e31152bc..cb11dc0ff808c 100755 --- a/go/arrow/math/type_simd_arm64.go.tmpl +++ b/go/arrow/math/type_simd_arm64.go.tmpl @@ -21,7 +21,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) {{$name := printf "%s_%s" .In.Type .D.arch}} diff --git a/go/arrow/math/type_test.go.tmpl b/go/arrow/math/type_test.go.tmpl index 4b11c043155a6..cc3d39a4a1ad6 100644 --- a/go/arrow/math/type_test.go.tmpl +++ b/go/arrow/math/type_test.go.tmpl @@ -19,9 +19,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/uint64.go b/go/arrow/math/uint64.go index 3b752caecdaf9..2a24886ee5510 100644 --- a/go/arrow/math/uint64.go +++ b/go/arrow/math/uint64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) type Uint64Funcs struct { diff --git a/go/arrow/math/uint64_avx2_amd64.go b/go/arrow/math/uint64_avx2_amd64.go index a4092e2140b1e..ba6ea29b92201 100644 --- a/go/arrow/math/uint64_avx2_amd64.go +++ b/go/arrow/math/uint64_avx2_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_neon_arm64.go b/go/arrow/math/uint64_neon_arm64.go index 574bbe4064f2a..b83ca85e55701 100755 --- a/go/arrow/math/uint64_neon_arm64.go +++ b/go/arrow/math/uint64_neon_arm64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_sse4_amd64.go b/go/arrow/math/uint64_sse4_amd64.go index fff3f3ffeeb31..a91ff4e5100d1 100644 --- a/go/arrow/math/uint64_sse4_amd64.go +++ b/go/arrow/math/uint64_sse4_amd64.go @@ -24,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_test.go b/go/arrow/math/uint64_test.go index 3403bf5039159..20418557b2101 100644 --- a/go/arrow/math/uint64_test.go +++ b/go/arrow/math/uint64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/math" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/math" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/buffer.go b/go/arrow/memory/buffer.go index 55a17afcf7401..586ff387f83de 100644 --- a/go/arrow/memory/buffer.go +++ b/go/arrow/memory/buffer.go @@ -19,7 +19,7 @@ package memory import ( "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Buffer is a wrapper type for a buffer of bytes. diff --git a/go/arrow/memory/buffer_test.go b/go/arrow/memory/buffer_test.go index d61b4e0a3bb4f..ddc5871c85379 100644 --- a/go/arrow/memory/buffer_test.go +++ b/go/arrow/memory/buffer_test.go @@ -19,7 +19,7 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index 5eb66ade9d861..c3cb0df47523d 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -22,7 +22,7 @@ package memory import ( "runtime" - cga "github.com/apache/arrow/go/v17/arrow/memory/internal/cgoalloc" + cga "github.com/apache/arrow/go/v18/arrow/memory/internal/cgoalloc" ) // CgoArrowAllocator is an allocator which exposes the C++ memory pool class diff --git a/go/arrow/memory/default_mallocator.go b/go/arrow/memory/default_mallocator.go index b30e7e75d8080..c1a4ed095fadf 100644 --- a/go/arrow/memory/default_mallocator.go +++ b/go/arrow/memory/default_mallocator.go @@ -19,7 +19,7 @@ package memory import ( - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" ) // DefaultAllocator is a default implementation of Allocator and can be used anywhere diff --git a/go/arrow/memory/default_mallocator_test.go b/go/arrow/memory/default_mallocator_test.go index 8737a5224e0f4..7667de1111a31 100644 --- a/go/arrow/memory/default_mallocator_test.go +++ b/go/arrow/memory/default_mallocator_test.go @@ -21,8 +21,8 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/mallocator/mallocator_test.go b/go/arrow/memory/mallocator/mallocator_test.go index 4070cc32b2735..5e1482b73697f 100644 --- a/go/arrow/memory/mallocator/mallocator_test.go +++ b/go/arrow/memory/mallocator/mallocator_test.go @@ -23,7 +23,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow/memory/mallocator" + "github.com/apache/arrow/go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/memory_test.go b/go/arrow/memory/memory_test.go index 35bd28e53c795..4a823494ff99b 100644 --- a/go/arrow/memory/memory_test.go +++ b/go/arrow/memory/memory_test.go @@ -19,7 +19,7 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/record.go b/go/arrow/record.go index a2234d03a9f5b..b812fcd481a60 100644 --- a/go/arrow/record.go +++ b/go/arrow/record.go @@ -16,7 +16,7 @@ package arrow -import "github.com/apache/arrow/go/v17/internal/json" +import "github.com/apache/arrow/go/v18/internal/json" // Record is a collection of equal-length arrays matching a particular Schema. // Also known as a RecordBatch in the spec and in some implementations. diff --git a/go/arrow/scalar/append.go b/go/arrow/scalar/append.go index 9520514743443..3a5823775457d 100644 --- a/go/arrow/scalar/append.go +++ b/go/arrow/scalar/append.go @@ -21,11 +21,11 @@ package scalar import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/scalar/append_test.go b/go/arrow/scalar/append_test.go index bbfa5d289cf78..a8eca4ee455b4 100644 --- a/go/arrow/scalar/append_test.go +++ b/go/arrow/scalar/append_test.go @@ -23,11 +23,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/scalar/binary.go b/go/arrow/scalar/binary.go index 6e1fadb7a1256..26c153dc7a46d 100644 --- a/go/arrow/scalar/binary.go +++ b/go/arrow/scalar/binary.go @@ -21,8 +21,8 @@ import ( "fmt" "unicode/utf8" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" ) type BinaryScalar interface { diff --git a/go/arrow/scalar/compare.go b/go/arrow/scalar/compare.go index f54ff7c82c84a..d32a685b819c6 100644 --- a/go/arrow/scalar/compare.go +++ b/go/arrow/scalar/compare.go @@ -16,7 +16,7 @@ package scalar -import "github.com/apache/arrow/go/v17/arrow" +import "github.com/apache/arrow/go/v18/arrow" // Equals returns true if two scalars are equal, which means they have the same // datatype, validity and value. diff --git a/go/arrow/scalar/nested.go b/go/arrow/scalar/nested.go index 484dad795e791..5f3447e686873 100644 --- a/go/arrow/scalar/nested.go +++ b/go/arrow/scalar/nested.go @@ -21,10 +21,10 @@ import ( "errors" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/scalar/numeric.gen.go b/go/arrow/scalar/numeric.gen.go index 7287d4ff00932..25bdc242ae8c9 100644 --- a/go/arrow/scalar/numeric.gen.go +++ b/go/arrow/scalar/numeric.gen.go @@ -24,9 +24,9 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" ) type Int8 struct { diff --git a/go/arrow/scalar/numeric.gen_test.go b/go/arrow/scalar/numeric.gen_test.go index 071af7cfc1524..9349022b65591 100644 --- a/go/arrow/scalar/numeric.gen_test.go +++ b/go/arrow/scalar/numeric.gen_test.go @@ -21,8 +21,8 @@ package scalar_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/scalar/numeric.gen_test.go.tmpl b/go/arrow/scalar/numeric.gen_test.go.tmpl index e21b4f20eeb7c..f5de3f9dcbfa3 100644 --- a/go/arrow/scalar/numeric.gen_test.go.tmpl +++ b/go/arrow/scalar/numeric.gen_test.go.tmpl @@ -19,8 +19,8 @@ package scalar_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go index ba189523fcc01..866e627113d88 100644 --- a/go/arrow/scalar/parse.go +++ b/go/arrow/scalar/parse.go @@ -25,12 +25,12 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" ) type TypeToScalar interface { diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index 7f210c0a580cf..f81465278a1e0 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -26,16 +26,16 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/encoded" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/internal/debug" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/encoded" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/scalar/scalar_test.go b/go/arrow/scalar/scalar_test.go index fcb88c0fb7e4f..0775eecedd027 100644 --- a/go/arrow/scalar/scalar_test.go +++ b/go/arrow/scalar/scalar_test.go @@ -25,12 +25,12 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/scalar" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/scalar/temporal.go b/go/arrow/scalar/temporal.go index 718a63b6b225b..ee43f1b629c1d 100644 --- a/go/arrow/scalar/temporal.go +++ b/go/arrow/scalar/temporal.go @@ -22,7 +22,7 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func temporalToString(s TemporalScalar) string { diff --git a/go/arrow/schema.go b/go/arrow/schema.go index bdfee7f4325e9..fd6c3cf1f4025 100644 --- a/go/arrow/schema.go +++ b/go/arrow/schema.go @@ -21,7 +21,7 @@ import ( "sort" "strings" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) type Metadata struct { diff --git a/go/arrow/schema_test.go b/go/arrow/schema_test.go index 3d26a769e98b5..ccdd8a02c9c2b 100644 --- a/go/arrow/schema_test.go +++ b/go/arrow/schema_test.go @@ -21,7 +21,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/table.go b/go/arrow/table.go index ccf28547c2177..5ad2aa08e9341 100644 --- a/go/arrow/table.go +++ b/go/arrow/table.go @@ -20,7 +20,7 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Table represents a logical sequence of chunked arrays of equal length. It is diff --git a/go/arrow/tensor/numeric.gen.go b/go/arrow/tensor/numeric.gen.go index 51995d9ba147f..81ae6af41b09e 100644 --- a/go/arrow/tensor/numeric.gen.go +++ b/go/arrow/tensor/numeric.gen.go @@ -19,7 +19,7 @@ package tensor import ( - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) // Int8 is an n-dim array of int8s. diff --git a/go/arrow/tensor/numeric.gen.go.tmpl b/go/arrow/tensor/numeric.gen.go.tmpl index 17c348fa2b73d..9f30297e1ac32 100644 --- a/go/arrow/tensor/numeric.gen.go.tmpl +++ b/go/arrow/tensor/numeric.gen.go.tmpl @@ -17,8 +17,8 @@ package tensor import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" ) {{range .In}} diff --git a/go/arrow/tensor/numeric.gen_test.go b/go/arrow/tensor/numeric.gen_test.go index 92f4f7015edd2..8039aea39667a 100644 --- a/go/arrow/tensor/numeric.gen_test.go +++ b/go/arrow/tensor/numeric.gen_test.go @@ -23,10 +23,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) func TestTensorInt8(t *testing.T) { diff --git a/go/arrow/tensor/numeric.gen_test.go.tmpl b/go/arrow/tensor/numeric.gen_test.go.tmpl index c080df30e0f36..593be259ce630 100644 --- a/go/arrow/tensor/numeric.gen_test.go.tmpl +++ b/go/arrow/tensor/numeric.gen_test.go.tmpl @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) {{range .In}} diff --git a/go/arrow/tensor/tensor.go b/go/arrow/tensor/tensor.go index b19cc2b916743..067015f3a97d0 100644 --- a/go/arrow/tensor/tensor.go +++ b/go/arrow/tensor/tensor.go @@ -21,8 +21,8 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) // Interface represents an n-dimensional array of numerical data. diff --git a/go/arrow/tensor/tensor_test.go b/go/arrow/tensor/tensor_test.go index 5ed420b96bba7..73547e32c3bcb 100644 --- a/go/arrow/tensor/tensor_test.go +++ b/go/arrow/tensor/tensor_test.go @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/tensor" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/tensor" ) func TestTensor(t *testing.T) { diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index e8e7cf174ed2d..aae6ad106487f 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -20,9 +20,9 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/type_traits_boolean.go b/go/arrow/type_traits_boolean.go index 9bdc0b824e6f6..74d643ba6206e 100644 --- a/go/arrow/type_traits_boolean.go +++ b/go/arrow/type_traits_boolean.go @@ -17,7 +17,7 @@ package arrow import ( - "github.com/apache/arrow/go/v17/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/bitutil" ) type booleanTraits struct{} diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index 604c5be3f3670..c93e781d934cb 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/endian" ) // Decimal128 traits diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index de4d931fed748..9ef47c31bdd04 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/endian" ) // Decimal256 traits diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index 46ab2b8fc9ed8..0552932cf9b02 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/float16" ) // Float16 traits diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index 48ae7eb376453..94b5274d45968 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -19,8 +19,8 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/internal/debug" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/internal/debug" ) var ( diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index bf20dc9995a70..84a32a9115805 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -22,7 +22,7 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var ( diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index 9e5c68a2fc8fd..e74194b5260fc 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -20,7 +20,7 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var ( diff --git a/go/arrow/type_traits_numeric.gen_test.go b/go/arrow/type_traits_numeric.gen_test.go index 90324197a3352..3d021575a6654 100644 --- a/go/arrow/type_traits_numeric.gen_test.go +++ b/go/arrow/type_traits_numeric.gen_test.go @@ -22,7 +22,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func TestInt64Traits(t *testing.T) { diff --git a/go/arrow/type_traits_numeric.gen_test.go.tmpl b/go/arrow/type_traits_numeric.gen_test.go.tmpl index 440f240ab30d2..9f7118eb8ec4a 100644 --- a/go/arrow/type_traits_numeric.gen_test.go.tmpl +++ b/go/arrow/type_traits_numeric.gen_test.go.tmpl @@ -20,7 +20,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) {{- range .In}} diff --git a/go/arrow/type_traits_test.go b/go/arrow/type_traits_test.go index ee28b81de09a6..ec653c0b5bbae 100644 --- a/go/arrow/type_traits_test.go +++ b/go/arrow/type_traits_test.go @@ -22,10 +22,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/float16" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/float16" ) func TestBooleanTraits(t *testing.T) { diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go index 09ef09d8ea188..e506b6f473d80 100644 --- a/go/arrow/type_traits_timestamp.go +++ b/go/arrow/type_traits_timestamp.go @@ -19,7 +19,7 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var TimestampTraits timestampTraits diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go index 36c83f011251e..ef35bc0d7ec55 100644 --- a/go/arrow/type_traits_view.go +++ b/go/arrow/type_traits_view.go @@ -19,7 +19,7 @@ package arrow import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/endian" ) var ViewHeaderTraits viewHeaderTraits diff --git a/go/arrow/util/byte_size.go b/go/arrow/util/byte_size.go index 37b47886f2652..bb163fcef5fc0 100644 --- a/go/arrow/util/byte_size.go +++ b/go/arrow/util/byte_size.go @@ -17,9 +17,9 @@ package util import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" ) func isArrayDataNil(arrayData arrow.ArrayData) bool { diff --git a/go/arrow/util/byte_size_test.go b/go/arrow/util/byte_size_test.go index 6fbbe9dce094b..ff6d8ea7edf0c 100644 --- a/go/arrow/util/byte_size_test.go +++ b/go/arrow/util/byte_size_test.go @@ -20,10 +20,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/util" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/util" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/util/protobuf_reflect.go b/go/arrow/util/protobuf_reflect.go index b4c8d68db8b0d..03153563b8cb5 100644 --- a/go/arrow/util/protobuf_reflect.go +++ b/go/arrow/util/protobuf_reflect.go @@ -20,9 +20,9 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" "github.com/huandu/xstrings" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/reflect/protoreflect" diff --git a/go/arrow/util/protobuf_reflect_test.go b/go/arrow/util/protobuf_reflect_test.go index ab3cbdf9a6b13..220552df8d89e 100644 --- a/go/arrow/util/protobuf_reflect_test.go +++ b/go/arrow/util/protobuf_reflect_test.go @@ -20,10 +20,10 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/arrow/util/util_message" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/arrow/util/util_message" "github.com/huandu/xstrings" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/go.mod b/go/go.mod index a5e359741c26c..1c730cc87709b 100644 --- a/go/go.mod +++ b/go/go.mod @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -module github.com/apache/arrow/go/v17 +module github.com/apache/arrow/go/v18 go 1.21 diff --git a/go/internal/bitutils/bit_block_counter.go b/go/internal/bitutils/bit_block_counter.go index 677c497c14d23..89e50b2dc6b1d 100644 --- a/go/internal/bitutils/bit_block_counter.go +++ b/go/internal/bitutils/bit_block_counter.go @@ -21,8 +21,8 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) func loadWord(byt []byte) uint64 { diff --git a/go/internal/bitutils/bit_block_counter_test.go b/go/internal/bitutils/bit_block_counter_test.go index 7ddd9ca343624..064d4b46b452f 100644 --- a/go/internal/bitutils/bit_block_counter_test.go +++ b/go/internal/bitutils/bit_block_counter_test.go @@ -19,9 +19,9 @@ package bitutils_test import ( "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" "github.com/stretchr/testify/assert" "golang.org/x/exp/rand" ) diff --git a/go/internal/bitutils/bit_run_reader.go b/go/internal/bitutils/bit_run_reader.go index aaba0bed8eb19..cce6792a6d0c8 100644 --- a/go/internal/bitutils/bit_run_reader.go +++ b/go/internal/bitutils/bit_run_reader.go @@ -22,9 +22,9 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) // BitRun represents a run of bits with the same value of length Len diff --git a/go/internal/bitutils/bit_run_reader_test.go b/go/internal/bitutils/bit_run_reader_test.go index 528fef09c313b..e3e53c92621cd 100644 --- a/go/internal/bitutils/bit_run_reader_test.go +++ b/go/internal/bitutils/bit_run_reader_test.go @@ -21,9 +21,9 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/internal/bitutils" "github.com/stretchr/testify/assert" ) diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go index f84d7d975dba4..2c6a39f5352e6 100644 --- a/go/internal/bitutils/bit_set_run_reader.go +++ b/go/internal/bitutils/bit_set_run_reader.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "math/bits" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) // IsMultipleOf64 returns whether v is a multiple of 64. diff --git a/go/internal/bitutils/bit_set_run_reader_test.go b/go/internal/bitutils/bit_set_run_reader_test.go index 322906804cae8..c42f8b0d6dce7 100644 --- a/go/internal/bitutils/bit_set_run_reader_test.go +++ b/go/internal/bitutils/bit_set_run_reader_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" "github.com/stretchr/testify/suite" ) diff --git a/go/internal/bitutils/bitmap_generate.go b/go/internal/bitutils/bitmap_generate.go index 1871b9570092e..a6d43b4622f93 100644 --- a/go/internal/bitutils/bitmap_generate.go +++ b/go/internal/bitutils/bitmap_generate.go @@ -16,7 +16,7 @@ package bitutils -import "github.com/apache/arrow/go/v17/arrow/bitutil" +import "github.com/apache/arrow/go/v18/arrow/bitutil" // GenerateBits writes sequential bits to a bitmap. Bits preceding the // initial start offset are preserved, bits following the bitmap may diff --git a/go/internal/bitutils/bitmap_generate_test.go b/go/internal/bitutils/bitmap_generate_test.go index 1367fe773c00d..d75f5a72147b1 100644 --- a/go/internal/bitutils/bitmap_generate_test.go +++ b/go/internal/bitutils/bitmap_generate_test.go @@ -19,7 +19,7 @@ package bitutils_test import ( "testing" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/bitutils" "golang.org/x/exp/rand" ) diff --git a/go/internal/hashing/xxh3_memo_table.gen.go b/go/internal/hashing/xxh3_memo_table.gen.go index 47626d317f10e..c8f56ed21565e 100644 --- a/go/internal/hashing/xxh3_memo_table.gen.go +++ b/go/internal/hashing/xxh3_memo_table.gen.go @@ -21,9 +21,9 @@ package hashing import ( "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) type payloadInt8 struct { diff --git a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl index 34fd25246f6a4..b852a9d79a39b 100644 --- a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl +++ b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl @@ -17,8 +17,8 @@ package hashing import ( - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/utils" ) {{range .In}} diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go index e0c9c4c185c9d..3c63b36874600 100644 --- a/go/internal/types/extension_types.go +++ b/go/internal/types/extension_types.go @@ -24,9 +24,9 @@ import ( "reflect" "strings" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/internal/json" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/internal/json" "github.com/google/uuid" "golang.org/x/xerrors" ) diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go index 632375c2685c0..50abaae3a9e06 100644 --- a/go/internal/types/extension_types_test.go +++ b/go/internal/types/extension_types_test.go @@ -20,11 +20,11 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/internal/types" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/internal/utils/transpose_ints_def.go b/go/internal/utils/transpose_ints_def.go index 83f0a256a774f..2095b3d08c54b 100644 --- a/go/internal/utils/transpose_ints_def.go +++ b/go/internal/utils/transpose_ints_def.go @@ -19,7 +19,7 @@ package utils import ( "errors" - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) //go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go diff --git a/go/internal/utils/transpose_ints_test.go b/go/internal/utils/transpose_ints_test.go index c26782086802c..427a1ad041c55 100644 --- a/go/internal/utils/transpose_ints_test.go +++ b/go/internal/utils/transpose_ints_test.go @@ -22,7 +22,7 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/internal/utils" ) var ( diff --git a/go/parquet/cmd/parquet_reader/dumper.go b/go/parquet/cmd/parquet_reader/dumper.go index a7504fdd4e849..bab9939bfd243 100644 --- a/go/parquet/cmd/parquet_reader/dumper.go +++ b/go/parquet/cmd/parquet_reader/dumper.go @@ -22,9 +22,9 @@ import ( "reflect" "time" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" ) const defaultBatchSize = 128 diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go index d0577868e8670..6e04f4254f9fa 100644 --- a/go/parquet/cmd/parquet_reader/main.go +++ b/go/parquet/cmd/parquet_reader/main.go @@ -25,11 +25,11 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/docopt/docopt-go" ) diff --git a/go/parquet/cmd/parquet_schema/main.go b/go/parquet/cmd/parquet_schema/main.go index d0ff87feb5a56..0cd0700e4e41e 100644 --- a/go/parquet/cmd/parquet_schema/main.go +++ b/go/parquet/cmd/parquet_schema/main.go @@ -20,8 +20,8 @@ import ( "fmt" "os" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/docopt/docopt-go" ) diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go index e7574d2b1a779..9d3a22c9e58e8 100644 --- a/go/parquet/compress/brotli.go +++ b/go/parquet/compress/brotli.go @@ -21,7 +21,7 @@ import ( "io" "github.com/andybalholm/brotli" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/debug" ) type brotliCodec struct{} diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go index 83b174e449c9f..b6a1349133e84 100644 --- a/go/parquet/compress/compress.go +++ b/go/parquet/compress/compress.go @@ -23,7 +23,7 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // Compression is an alias to the thrift compression codec enum type for easy use diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go index 2aaec95bf2eee..843062c0d024a 100644 --- a/go/parquet/compress/compress_test.go +++ b/go/parquet/compress/compress_test.go @@ -22,7 +22,7 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v17/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/compress" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go index e81d364fceeb4..ea2126be18cc9 100644 --- a/go/parquet/compress/zstd.go +++ b/go/parquet/compress/zstd.go @@ -20,7 +20,7 @@ import ( "io" "sync" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/debug" "github.com/klauspost/compress/zstd" ) diff --git a/go/parquet/doc.go b/go/parquet/doc.go index c580b8e317a67..a4fdd6e5dda21 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -30,9 +30,9 @@ // // You can download the library and cli utilities via: // -// go get -u github.com/apache/arrow/go/v17/parquet -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest +// go get -u github.com/apache/arrow/go/v18/parquet +// go install github.com/apache/arrow/go/v18/parquet/cmd/parquet_reader@latest +// go install github.com/apache/arrow/go/v18/parquet/cmd/parquet_schema@latest // // # Modules // diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go index f29bf80da9b75..e9cb07d18bfe4 100644 --- a/go/parquet/encryption_properties.go +++ b/go/parquet/encryption_properties.go @@ -20,7 +20,7 @@ import ( "crypto/rand" "unicode/utf8" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // Constants that will be used as the default values with encryption/decryption diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go index 0519b7a9db96b..8f50e5880b555 100644 --- a/go/parquet/encryption_properties_test.go +++ b/go/parquet/encryption_properties_test.go @@ -19,8 +19,8 @@ package parquet_test import ( "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/encryption_read_config_test.go b/go/parquet/encryption_read_config_test.go index e78dffc641905..1e2de16416d31 100644 --- a/go/parquet/encryption_read_config_test.go +++ b/go/parquet/encryption_read_config_test.go @@ -23,10 +23,10 @@ import ( "path" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/encryption_write_config_test.go b/go/parquet/encryption_write_config_test.go index 6b5c171e6ea0d..01a5c2be93a46 100644 --- a/go/parquet/encryption_write_config_test.go +++ b/go/parquet/encryption_write_config_test.go @@ -23,10 +23,10 @@ import ( "path/filepath" "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go index 74a1b4486a703..38ebcf2893c46 100644 --- a/go/parquet/file/column_reader.go +++ b/go/parquet/file/column_reader.go @@ -21,13 +21,13 @@ import ( "fmt" "sync" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go index 7d8c600138e06..b744c561d3b96 100755 --- a/go/parquet/file/column_reader_test.go +++ b/go/parquet/file/column_reader_test.go @@ -24,12 +24,12 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/file/column_reader_types.gen.go b/go/parquet/file/column_reader_types.gen.go index 1e7837e5679fb..e0d0afac38ac2 100644 --- a/go/parquet/file/column_reader_types.gen.go +++ b/go/parquet/file/column_reader_types.gen.go @@ -21,9 +21,9 @@ package file import ( "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) // Int32ColumnChunkReader is the Typed Column chunk reader instance for reading diff --git a/go/parquet/file/column_reader_types.gen.go.tmpl b/go/parquet/file/column_reader_types.gen.go.tmpl index a1cccbad16e37..b6056836d76f4 100644 --- a/go/parquet/file/column_reader_types.gen.go.tmpl +++ b/go/parquet/file/column_reader_types.gen.go.tmpl @@ -17,8 +17,8 @@ package file import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) {{range .In}} diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go index 520622f0da6c6..bbf30e03087d5 100755 --- a/go/parquet/file/column_writer.go +++ b/go/parquet/file/column_writer.go @@ -21,14 +21,14 @@ import ( "encoding/binary" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata column_writer_types.gen.go.tmpl diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index cd2408f4fba5d..009c8c8bc51fd 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -24,22 +24,22 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - arrutils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/pqarrow" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + arrutils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" diff --git a/go/parquet/file/column_writer_types.gen.go b/go/parquet/file/column_writer_types.gen.go index d6c0e8142ab1b..612b4095098a1 100644 --- a/go/parquet/file/column_writer_types.gen.go +++ b/go/parquet/file/column_writer_types.gen.go @@ -19,14 +19,14 @@ package file import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) // Int32ColumnChunkWriter is the typed interface for writing columns to a parquet diff --git a/go/parquet/file/column_writer_types.gen.go.tmpl b/go/parquet/file/column_writer_types.gen.go.tmpl index 6fbd3d4ffde15..cb48dd64ceedc 100644 --- a/go/parquet/file/column_writer_types.gen.go.tmpl +++ b/go/parquet/file/column_writer_types.gen.go.tmpl @@ -19,11 +19,11 @@ package file import ( "fmt" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) {{range .In}} diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go index 145c09bb5fcd4..f838482fbb0e9 100644 --- a/go/parquet/file/file_reader.go +++ b/go/parquet/file/file_reader.go @@ -25,10 +25,10 @@ import ( "runtime" "sync" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/file_reader_mmap.go b/go/parquet/file/file_reader_mmap.go index c3e487b536f73..77afb6b639bff 100644 --- a/go/parquet/file/file_reader_mmap.go +++ b/go/parquet/file/file_reader_mmap.go @@ -22,7 +22,7 @@ package file import ( "io" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/exp/mmap" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/file_reader_mmap_windows.go b/go/parquet/file/file_reader_mmap_windows.go index 776eb98d5d282..87aaafd9e7d81 100644 --- a/go/parquet/file/file_reader_mmap_windows.go +++ b/go/parquet/file/file_reader_mmap_windows.go @@ -22,7 +22,7 @@ package file import ( "errors" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) func mmapOpen(filename string) (parquet.ReaderAtSeeker, error) { diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index d4faf26086f93..7d20bbe1006f8 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -26,16 +26,16 @@ import ( "path" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" libthrift "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go index ce5e13c24d05a..6fb64f3b8c315 100644 --- a/go/parquet/file/file_writer.go +++ b/go/parquet/file/file_writer.go @@ -21,11 +21,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) // Writer is the primary interface for writing a parquet file diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index e5ad1b07e25de..0faf3f7233bd3 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -22,13 +22,13 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go index 9ab92bc74167e..29aa613de0db6 100755 --- a/go/parquet/file/level_conversion.go +++ b/go/parquet/file/level_conversion.go @@ -22,11 +22,11 @@ import ( "math/bits" "unsafe" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/bmi" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/bmi" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/level_conversion_test.go b/go/parquet/file/level_conversion_test.go index 34a107163a197..740c0e674469b 100644 --- a/go/parquet/file/level_conversion_test.go +++ b/go/parquet/file/level_conversion_test.go @@ -20,9 +20,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/parquet/internal/bmi" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/parquet/internal/bmi" + "github.com/apache/arrow/go/v18/parquet/internal/utils" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go index 3c166253cdabe..91dcc3c66aa5d 100644 --- a/go/parquet/file/page_reader.go +++ b/go/parquet/file/page_reader.go @@ -23,13 +23,13 @@ import ( "sync" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/page_writer.go b/go/parquet/file/page_writer.go index 1c23917fe202c..82cd37e1a7774 100644 --- a/go/parquet/file/page_writer.go +++ b/go/parquet/file/page_writer.go @@ -20,15 +20,15 @@ import ( "bytes" "sync" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" libthrift "github.com/apache/thrift/lib/go/thrift" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go index 1a1310195a88a..667ffca77a8d1 100755 --- a/go/parquet/file/record_reader.go +++ b/go/parquet/file/record_reader.go @@ -23,14 +23,14 @@ import ( "unsafe" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go index 1e8444e1fc1d0..25ca6d87d895f 100644 --- a/go/parquet/file/row_group_reader.go +++ b/go/parquet/file/row_group_reader.go @@ -20,10 +20,10 @@ import ( "fmt" "sync" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_writer.go b/go/parquet/file/row_group_writer.go index dfb10d584cce6..d18ff270939d2 100644 --- a/go/parquet/file/row_group_writer.go +++ b/go/parquet/file/row_group_writer.go @@ -17,10 +17,10 @@ package file import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_writer_test.go b/go/parquet/file/row_group_writer_test.go index 2fdba06381f55..0074611235245 100644 --- a/go/parquet/file/row_group_writer_test.go +++ b/go/parquet/file/row_group_writer_test.go @@ -20,10 +20,10 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/bmi/bmi_test.go b/go/parquet/internal/bmi/bmi_test.go index 2b7cc59000ac1..41a74ba3afcc3 100644 --- a/go/parquet/internal/bmi/bmi_test.go +++ b/go/parquet/internal/bmi/bmi_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/parquet/internal/bmi" + "github.com/apache/arrow/go/v18/parquet/internal/bmi" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go index d21fb3dd56603..772fe96fde8f0 100644 --- a/go/parquet/internal/encoding/boolean_decoder.go +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -23,10 +23,10 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow/bitutil" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) // PlainBooleanDecoder is for the Plain Encoding type, there is no diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go index f77ae05fe30ef..b95707cb2b1da 100644 --- a/go/parquet/internal/encoding/boolean_encoder.go +++ b/go/parquet/internal/encoding/boolean_encoder.go @@ -19,10 +19,10 @@ package encoding import ( "encoding/binary" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) const ( diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go index 12aaed110b0bc..6a87e5f3b9832 100644 --- a/go/parquet/internal/encoding/byte_array_decoder.go +++ b/go/parquet/internal/encoding/byte_array_decoder.go @@ -19,12 +19,12 @@ package encoding import ( "encoding/binary" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - pqutils "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + pqutils "github.com/apache/arrow/go/v18/parquet/internal/utils" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go index fe6b9b147b0bd..518c2e7f7324e 100644 --- a/go/parquet/internal/encoding/byte_array_encoder.go +++ b/go/parquet/internal/encoding/byte_array_encoder.go @@ -21,11 +21,11 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" ) // PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding diff --git a/go/parquet/internal/encoding/byte_stream_split.go b/go/parquet/internal/encoding/byte_stream_split.go index 3772aa876173f..e5fe91ada6d77 100644 --- a/go/parquet/internal/encoding/byte_stream_split.go +++ b/go/parquet/internal/encoding/byte_stream_split.go @@ -20,10 +20,10 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go index 71bfc872f133c..12a670198afa6 100644 --- a/go/parquet/internal/encoding/decoder.go +++ b/go/parquet/internal/encoding/decoder.go @@ -20,16 +20,16 @@ import ( "bytes" "reflect" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index db42998818b39..ca1ed14511f43 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -23,11 +23,11 @@ import ( "math/bits" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) // see the deltaBitPack encoder for a description of the encoding format that is diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index d8c9fb92c65ef..e7990f0dacbe8 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go index c2e4e6849396e..ec344cbecf845 100644 --- a/go/parquet/internal/encoding/delta_byte_array_test.go +++ b/go/parquet/internal/encoding/delta_byte_array_test.go @@ -20,8 +20,8 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index eb7a74ecdf117..b72960fe438ad 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go index 74a6d8dac6566..2373449370f23 100644 --- a/go/parquet/internal/encoding/encoder.go +++ b/go/parquet/internal/encoding/encoder.go @@ -21,14 +21,14 @@ import ( "math/bits" "reflect" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" ) //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go index 6bf0aec0d2035..95c0b3861bc05 100644 --- a/go/parquet/internal/encoding/encoding_benchmarks_test.go +++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go @@ -21,14 +21,14 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" ) const ( diff --git a/go/parquet/internal/encoding/encoding_test.go b/go/parquet/internal/encoding/encoding_test.go index 5e95ab16e2676..4d681eaf02307 100644 --- a/go/parquet/internal/encoding/encoding_test.go +++ b/go/parquet/internal/encoding/encoding_test.go @@ -26,13 +26,13 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go index e264697a8c547..7e319845a8089 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -20,8 +20,8 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go index a93164e305fdf..9e6377db868f1 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go @@ -19,9 +19,9 @@ package encoding import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet" ) // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array diff --git a/go/parquet/internal/encoding/levels.go b/go/parquet/internal/encoding/levels.go index d7ee657b34f6c..81c9011c78e1e 100644 --- a/go/parquet/internal/encoding/levels.go +++ b/go/parquet/internal/encoding/levels.go @@ -24,11 +24,11 @@ import ( "math/bits" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" ) // LevelEncoder is for handling the encoding of Definition and Repetition levels diff --git a/go/parquet/internal/encoding/levels_test.go b/go/parquet/internal/encoding/levels_test.go index cce2cbe1ee495..1990df90a0195 100644 --- a/go/parquet/internal/encoding/levels_test.go +++ b/go/parquet/internal/encoding/levels_test.go @@ -21,11 +21,11 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/memo_table.go b/go/parquet/internal/encoding/memo_table.go index 7cf073cf910f6..117ca85346d57 100644 --- a/go/parquet/internal/encoding/memo_table.go +++ b/go/parquet/internal/encoding/memo_table.go @@ -20,11 +20,11 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/parquet" ) //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata memo_table_types.gen.go.tmpl diff --git a/go/parquet/internal/encoding/memo_table_test.go b/go/parquet/internal/encoding/memo_table_test.go index 9032872502871..904502cafc193 100644 --- a/go/parquet/internal/encoding/memo_table_test.go +++ b/go/parquet/internal/encoding/memo_table_test.go @@ -20,11 +20,11 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/hashing" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/hashing" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go b/go/parquet/internal/encoding/memo_table_types.gen.go index 6d8d86d780fae..1169afc455ff5 100644 --- a/go/parquet/internal/encoding/memo_table_types.gen.go +++ b/go/parquet/internal/encoding/memo_table_types.gen.go @@ -19,8 +19,8 @@ package encoding import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" ) // standard map based implementation of memo tables which can be more efficient diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl index 3912c3eeaa87b..9708b0b97e527 100644 --- a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl @@ -17,7 +17,7 @@ package encoding import ( - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) // standard map based implementation of memo tables which can be more efficient diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go index 34e8bf540ce06..b651fe2d71864 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go @@ -24,11 +24,11 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl index bc5cebd698188..1f2bc047464ea 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl @@ -20,10 +20,10 @@ import ( "encoding/binary" "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/internal/bitutils" ) var ( diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index 663c1164c565e..3a960e2c62332 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -22,15 +22,15 @@ import ( "fmt" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index ebd7733135a52..079c1aad6bd3f 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -17,13 +17,13 @@ package encoding import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/internal/bitutils" ) // fully typed encoder interfaces to enable writing against encoder/decoders diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 6962c95d4f818..fb81ba8729cca 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -20,11 +20,11 @@ import ( "io" "sync" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go index c613e4a02e3a8..1e861ffd2a1d0 100644 --- a/go/parquet/internal/encryption/aes.go +++ b/go/parquet/internal/encryption/aes.go @@ -29,7 +29,7 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) // important constants for handling the aes encryption diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go index 6a28f6ed7b234..6af9a4aacfe15 100644 --- a/go/parquet/internal/encryption/decryptor.go +++ b/go/parquet/internal/encryption/decryptor.go @@ -19,8 +19,8 @@ package encryption import ( "io" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" ) // FileDecryptor is an interface used by the filereader for decrypting an diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go index fd2860ef323c4..57ff0a4173cdf 100644 --- a/go/parquet/internal/encryption/encryptor.go +++ b/go/parquet/internal/encryption/encryptor.go @@ -19,8 +19,8 @@ package encryption import ( "io" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" ) // FileEncryptor is the interface for constructing encryptors for the different diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go index cd1437638c485..e3b8ffccb341c 100644 --- a/go/parquet/internal/testutils/pagebuilder.go +++ b/go/parquet/internal/testutils/pagebuilder.go @@ -22,13 +22,13 @@ import ( "io" "reflect" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/mock" ) diff --git a/go/parquet/internal/testutils/primitive_typed.go b/go/parquet/internal/testutils/primitive_typed.go index 55366dc861352..d97677c54d727 100644 --- a/go/parquet/internal/testutils/primitive_typed.go +++ b/go/parquet/internal/testutils/primitive_typed.go @@ -20,11 +20,11 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" ) type PrimitiveTypedTest struct { diff --git a/go/parquet/internal/testutils/random.go b/go/parquet/internal/testutils/random.go index 568d1a6b5f1df..1f4b1b7068beb 100644 --- a/go/parquet/internal/testutils/random.go +++ b/go/parquet/internal/testutils/random.go @@ -24,14 +24,14 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/endian" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/endian" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" diff --git a/go/parquet/internal/testutils/random_arrow.go b/go/parquet/internal/testutils/random_arrow.go index fe52932e51576..f9a199de77963 100644 --- a/go/parquet/internal/testutils/random_arrow.go +++ b/go/parquet/internal/testutils/random_arrow.go @@ -17,10 +17,10 @@ package testutils import ( - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" "golang.org/x/exp/rand" ) diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go index 057a055884561..823f7fbd07d1d 100644 --- a/go/parquet/internal/testutils/utils.go +++ b/go/parquet/internal/testutils/utils.go @@ -19,7 +19,7 @@ package testutils import ( "reflect" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" ) var typeToParquetTypeMap = map[reflect.Type]parquet.Type{ diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go index e2600763dbf3c..f8b0f2170c45d 100644 --- a/go/parquet/internal/thrift/helpers.go +++ b/go/parquet/internal/thrift/helpers.go @@ -23,7 +23,7 @@ import ( "context" "io" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" "github.com/apache/thrift/lib/go/thrift" ) diff --git a/go/parquet/internal/utils/bit_benchmark_test.go b/go/parquet/internal/utils/bit_benchmark_test.go index 2227c22d3b4e5..d171e81e952fa 100644 --- a/go/parquet/internal/utils/bit_benchmark_test.go +++ b/go/parquet/internal/utils/bit_benchmark_test.go @@ -20,9 +20,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" ) type linearBitRunReader struct { diff --git a/go/parquet/internal/utils/bit_packing_arm64.go b/go/parquet/internal/utils/bit_packing_arm64.go index a8d3a996d8c46..89a00b0c63b9d 100644 --- a/go/parquet/internal/utils/bit_packing_arm64.go +++ b/go/parquet/internal/utils/bit_packing_arm64.go @@ -23,7 +23,7 @@ import ( "github.com/klauspost/cpuid/v2" // import for side effect of initializing feature flags // based on ARM_ENABLE_EXT env var - _ "github.com/apache/arrow/go/v17/parquet/internal/bmi" + _ "github.com/apache/arrow/go/v18/parquet/internal/bmi" ) func init() { diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go index d66968047adc8..2343b5500242c 100644 --- a/go/parquet/internal/utils/bit_reader.go +++ b/go/parquet/internal/utils/bit_reader.go @@ -24,10 +24,10 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // masks for grabbing the trailing bits based on the number of trailing bits desired diff --git a/go/parquet/internal/utils/bit_reader_test.go b/go/parquet/internal/utils/bit_reader_test.go index 24e4927ff8eca..5bb1c9a70190f 100644 --- a/go/parquet/internal/utils/bit_reader_test.go +++ b/go/parquet/internal/utils/bit_reader_test.go @@ -25,11 +25,11 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/internal/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "golang.org/x/exp/rand" diff --git a/go/parquet/internal/utils/bit_writer.go b/go/parquet/internal/utils/bit_writer.go index bb7d7a5e0c4fa..ab0cb3ce58445 100644 --- a/go/parquet/internal/utils/bit_writer.go +++ b/go/parquet/internal/utils/bit_writer.go @@ -21,7 +21,7 @@ import ( "io" "log" - "github.com/apache/arrow/go/v17/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/bitutil" ) // WriterAtBuffer is a convenience struct for providing a WriteAt function diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go index 011330bde550f..163e928f4b689 100644 --- a/go/parquet/internal/utils/bitmap_writer.go +++ b/go/parquet/internal/utils/bitmap_writer.go @@ -20,7 +20,7 @@ import ( "encoding/binary" "math/bits" - "github.com/apache/arrow/go/v17/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/bitutil" ) // BitmapWriter is an interface for bitmap writers so that we can use multiple diff --git a/go/parquet/internal/utils/bitmap_writer_test.go b/go/parquet/internal/utils/bitmap_writer_test.go index 893b003a8957c..39838e87d3223 100644 --- a/go/parquet/internal/utils/bitmap_writer_test.go +++ b/go/parquet/internal/utils/bitmap_writer_test.go @@ -22,8 +22,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/parquet/internal/utils" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/parquet/internal/utils" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go index affda41ec0ecb..bf24a5822341d 100644 --- a/go/parquet/internal/utils/rle.go +++ b/go/parquet/internal/utils/rle.go @@ -24,10 +24,10 @@ import ( "encoding/binary" "math" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go index be986b60786ba..80f76ef12d71a 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go @@ -19,9 +19,9 @@ package utils import ( - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl index 11b393e4a7ab8..992270d8d8e00 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl @@ -17,9 +17,9 @@ package utils import ( - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" ) {{range .In}} diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go index fa54aec347575..887ed79343a42 100644 --- a/go/parquet/metadata/app_version.go +++ b/go/parquet/metadata/app_version.go @@ -21,8 +21,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" ) var ( diff --git a/go/parquet/metadata/column_chunk.go b/go/parquet/metadata/column_chunk.go index 5bc5c049cd592..a05b3c9124154 100644 --- a/go/parquet/metadata/column_chunk.go +++ b/go/parquet/metadata/column_chunk.go @@ -22,13 +22,13 @@ import ( "io" "reflect" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/file.go b/go/parquet/metadata/file.go index fde319d0b32e5..2f7fe53303c3f 100644 --- a/go/parquet/metadata/file.go +++ b/go/parquet/metadata/file.go @@ -24,12 +24,12 @@ import ( "reflect" "unicode/utf8" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/thrift" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/thrift" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/metadata_test.go b/go/parquet/metadata/metadata_test.go index a631d8b925bd2..8ecb95cf41e9d 100644 --- a/go/parquet/metadata/metadata_test.go +++ b/go/parquet/metadata/metadata_test.go @@ -21,9 +21,9 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/metadata/row_group.go b/go/parquet/metadata/row_group.go index c9a55ffdb5b16..5373fb6f783fc 100644 --- a/go/parquet/metadata/row_group.go +++ b/go/parquet/metadata/row_group.go @@ -20,10 +20,10 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encryption" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encryption" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" ) // RowGroupMetaData is a proxy around the thrift RowGroup meta data object diff --git a/go/parquet/metadata/stat_compare_test.go b/go/parquet/metadata/stat_compare_test.go index f759ee9fe1734..dafbf3ed04f1a 100644 --- a/go/parquet/metadata/stat_compare_test.go +++ b/go/parquet/metadata/stat_compare_test.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/metadata/statistics.go b/go/parquet/metadata/statistics.go index 604fa50ee3b07..e7ffc2a096370 100644 --- a/go/parquet/metadata/statistics.go +++ b/go/parquet/metadata/statistics.go @@ -22,15 +22,15 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=statistics_types.tmpldata statistics_types.gen.go.tmpl diff --git a/go/parquet/metadata/statistics_test.go b/go/parquet/metadata/statistics_test.go index 9760c0ee7d2fb..913629959e928 100644 --- a/go/parquet/metadata/statistics_test.go +++ b/go/parquet/metadata/statistics_test.go @@ -21,12 +21,12 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/metadata/statistics_types.gen.go b/go/parquet/metadata/statistics_types.gen.go index a8670e221b706..0c383fc7f5414 100644 --- a/go/parquet/metadata/statistics_types.gen.go +++ b/go/parquet/metadata/statistics_types.gen.go @@ -22,15 +22,15 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/schema" ) type minmaxPairInt32 [2]int32 diff --git a/go/parquet/metadata/statistics_types.gen.go.tmpl b/go/parquet/metadata/statistics_types.gen.go.tmpl index 4cf47d1915f83..4b3c2a7158ac8 100644 --- a/go/parquet/metadata/statistics_types.gen.go.tmpl +++ b/go/parquet/metadata/statistics_types.gen.go.tmpl @@ -19,13 +19,13 @@ package metadata import ( "fmt" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" - "github.com/apache/arrow/go/v17/parquet/internal/utils" - shared_utils "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" + "github.com/apache/arrow/go/v18/parquet/internal/utils" + shared_utils "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/internal/bitutils" ) {{range .In}} diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go index 661d163213394..1e5d5958e6732 100644 --- a/go/parquet/pqarrow/column_readers.go +++ b/go/parquet/pqarrow/column_readers.go @@ -26,16 +26,16 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/sync/errgroup" ) diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go index 9f5c870fa7875..5526c98d7872c 100644 --- a/go/parquet/pqarrow/encode_arrow.go +++ b/go/parquet/pqarrow/encode_arrow.go @@ -25,16 +25,16 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/debug" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/debug" ) // get the count of the number of leaf arrays for the type diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 4f955eae4e133..9b3419988d6df 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -25,22 +25,22 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/bitutil" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/decimal256" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/pqarrow" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/bitutil" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/decimal256" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/parquet/pqarrow/encode_dict_compute.go b/go/parquet/pqarrow/encode_dict_compute.go index c698cdaf91fb1..647bb69db78d5 100644 --- a/go/parquet/pqarrow/encode_dict_compute.go +++ b/go/parquet/pqarrow/encode_dict_compute.go @@ -21,14 +21,14 @@ package pqarrow import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) func isDictEncoding(enc parquet.Encoding) bool { diff --git a/go/parquet/pqarrow/encode_dict_nocompute.go b/go/parquet/pqarrow/encode_dict_nocompute.go index 26efa87538b66..aa405a90e8a12 100644 --- a/go/parquet/pqarrow/encode_dict_nocompute.go +++ b/go/parquet/pqarrow/encode_dict_nocompute.go @@ -21,8 +21,8 @@ package pqarrow import ( "errors" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/parquet/file" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/parquet/file" ) func writeDictionaryArrow(*arrowWriteContext, file.ColumnChunkWriter, arrow.Array, []int16, []int16, bool) (err error) { diff --git a/go/parquet/pqarrow/encode_dictionary_test.go b/go/parquet/pqarrow/encode_dictionary_test.go index 200b81b3fec86..cacdc7e39cab3 100644 --- a/go/parquet/pqarrow/encode_dictionary_test.go +++ b/go/parquet/pqarrow/encode_dictionary_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/internal/testutils" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/compute" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/internal/testutils" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/parquet/pqarrow/file_reader.go b/go/parquet/pqarrow/file_reader.go index 3f958dab6f1ba..208ac9ceebadf 100755 --- a/go/parquet/pqarrow/file_reader.go +++ b/go/parquet/pqarrow/file_reader.go @@ -23,13 +23,13 @@ import ( "sync" "sync/atomic" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/arrio" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/arrio" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/sync/errgroup" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/file_reader_test.go b/go/parquet/pqarrow/file_reader_test.go index d7f03ac0531b7..b7d178f8644de 100644 --- a/go/parquet/pqarrow/file_reader_test.go +++ b/go/parquet/pqarrow/file_reader_test.go @@ -26,14 +26,14 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go index b1d266ff29080..891b757f5eb51 100644 --- a/go/parquet/pqarrow/file_writer.go +++ b/go/parquet/pqarrow/file_writer.go @@ -22,12 +22,12 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go index fc965279a928d..25ef3879e7811 100644 --- a/go/parquet/pqarrow/file_writer_test.go +++ b/go/parquet/pqarrow/file_writer_test.go @@ -22,11 +22,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/pqarrow/helpers.go b/go/parquet/pqarrow/helpers.go index a9a4242fdb44c..800cd84192005 100644 --- a/go/parquet/pqarrow/helpers.go +++ b/go/parquet/pqarrow/helpers.go @@ -17,7 +17,7 @@ package pqarrow import ( - "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v18/arrow" ) func releaseArrays(arrays []arrow.Array) { diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go index c28072afe2c24..ff439d59a6e75 100644 --- a/go/parquet/pqarrow/path_builder.go +++ b/go/parquet/pqarrow/path_builder.go @@ -21,12 +21,12 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/bitutils" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/bitutils" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go index 4b7a8f9094b76..9bbae426b8a46 100644 --- a/go/parquet/pqarrow/path_builder_test.go +++ b/go/parquet/pqarrow/path_builder_test.go @@ -20,10 +20,10 @@ import ( "context" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/go/parquet/pqarrow/properties.go b/go/parquet/pqarrow/properties.go index d3cf4de6ac74f..25a299c86f5f5 100755 --- a/go/parquet/pqarrow/properties.go +++ b/go/parquet/pqarrow/properties.go @@ -19,9 +19,9 @@ package pqarrow import ( "context" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/internal/encoding" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/internal/encoding" ) // ArrowWriterProperties are used to determine how to manipulate the arrow data diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go index c573dbe43a562..31bd0eba84388 100644 --- a/go/parquet/pqarrow/reader_writer_test.go +++ b/go/parquet/pqarrow/reader_writer_test.go @@ -22,12 +22,12 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/pqarrow" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go index 8b3ea854b7a8f..ce5cc6f905084 100644 --- a/go/parquet/pqarrow/schema.go +++ b/go/parquet/pqarrow/schema.go @@ -22,15 +22,15 @@ import ( "math" "strconv" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/decimal128" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/file" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/decimal128" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/file" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go index 3cbcb803fa68f..24b031c174bf2 100644 --- a/go/parquet/pqarrow/schema_test.go +++ b/go/parquet/pqarrow/schema_test.go @@ -20,15 +20,15 @@ import ( "encoding/base64" "testing" - "github.com/apache/arrow/go/v17/arrow" - "github.com/apache/arrow/go/v17/arrow/flight" - "github.com/apache/arrow/go/v17/arrow/ipc" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/types" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/metadata" - "github.com/apache/arrow/go/v17/parquet/pqarrow" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/flight" + "github.com/apache/arrow/go/v18/arrow/ipc" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/types" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go index 1b7a9ef5dcc17..a9db8efaffb23 100644 --- a/go/parquet/reader_properties.go +++ b/go/parquet/reader_properties.go @@ -21,8 +21,8 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/internal/utils" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/internal/utils" ) // ReaderProperties are used to define how the file reader will handle buffering and allocating buffers diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go index 784c644c13590..f07219c6463aa 100644 --- a/go/parquet/reader_writer_properties_test.go +++ b/go/parquet/reader_writer_properties_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/compress" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/compress" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go index b4c169eff06a4..e3cd9f709cddb 100644 --- a/go/parquet/schema/column.go +++ b/go/parquet/schema/column.go @@ -20,8 +20,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // Column encapsulates the information necessary to interpret primitive diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go index 681f96dfe6c88..5fc10f61cebc1 100644 --- a/go/parquet/schema/converted_types.go +++ b/go/parquet/schema/converted_types.go @@ -17,7 +17,7 @@ package schema import ( - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // ConvertedType corresponds to the ConvertedType in the parquet.Thrift, diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go index bd15d24c5695c..074bcb823942a 100644 --- a/go/parquet/schema/converted_types_test.go +++ b/go/parquet/schema/converted_types_test.go @@ -19,7 +19,7 @@ package schema_test import ( "testing" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go index ae0bdd6307560..87022442128b1 100644 --- a/go/parquet/schema/helpers.go +++ b/go/parquet/schema/helpers.go @@ -17,7 +17,7 @@ package schema import ( - "github.com/apache/arrow/go/v17/parquet" + "github.com/apache/arrow/go/v18/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go index 8c19070df2789..a9127d155f213 100644 --- a/go/parquet/schema/helpers_test.go +++ b/go/parquet/schema/helpers_test.go @@ -21,8 +21,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go index 94a7eaaf07b5b..e8adce1ca140e 100644 --- a/go/parquet/schema/logical_types.go +++ b/go/parquet/schema/logical_types.go @@ -20,10 +20,10 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/internal/debug" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/internal/debug" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) // DecimalMetadata is a struct for managing scale and precision information between diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go index da70b5a36139e..e33925966e178 100644 --- a/go/parquet/schema/logical_types_test.go +++ b/go/parquet/schema/logical_types_test.go @@ -19,9 +19,9 @@ package schema_test import ( "testing" - "github.com/apache/arrow/go/v17/internal/json" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/internal/json" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go index 08eec33019278..c395caf8a26c8 100644 --- a/go/parquet/schema/node.go +++ b/go/parquet/schema/node.go @@ -19,8 +19,8 @@ package schema import ( "fmt" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/apache/thrift/lib/go/thrift" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go index 5b4f6b5e1cb98..0bec9eb599dc8 100644 --- a/go/parquet/schema/reflection.go +++ b/go/parquet/schema/reflection.go @@ -22,10 +22,10 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/internal/utils" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/internal/utils" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) type taggedInfo struct { diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go index ca47459611611..6877f33c0169f 100644 --- a/go/parquet/schema/reflection_test.go +++ b/go/parquet/schema/reflection_test.go @@ -22,9 +22,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v17/arrow/float16" - "github.com/apache/arrow/go/v17/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/arrow/float16" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go index 81dca82ecad00..c8d53e647d6f9 100644 --- a/go/parquet/schema/schema.go +++ b/go/parquet/schema/schema.go @@ -35,8 +35,8 @@ import ( "io" "strings" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go index 4f57652be6c9e..7da55ce93abe6 100644 --- a/go/parquet/schema/schema_element_test.go +++ b/go/parquet/schema/schema_element_test.go @@ -19,8 +19,8 @@ package schema import ( "testing" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go index a128232d46309..e95d56f4d0617 100644 --- a/go/parquet/schema/schema_flatten_test.go +++ b/go/parquet/schema/schema_flatten_test.go @@ -19,8 +19,8 @@ package schema import ( "testing" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go index f0bd941bcb429..fa6c74492460f 100644 --- a/go/parquet/schema/schema_test.go +++ b/go/parquet/schema/schema_test.go @@ -20,9 +20,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v17/parquet" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v17/parquet/schema" + "github.com/apache/arrow/go/v18/parquet" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/parquet/schema" "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" diff --git a/go/parquet/types.go b/go/parquet/types.go index 71336a7987cd8..ff696c1178e18 100644 --- a/go/parquet/types.go +++ b/go/parquet/types.go @@ -24,8 +24,8 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v17/arrow" - format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v18/arrow" + format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" ) const ( diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go index 9ff7e5a582215..c38bb3b81fa6d 100644 --- a/go/parquet/writer_properties.go +++ b/go/parquet/writer_properties.go @@ -17,8 +17,8 @@ package parquet import ( - "github.com/apache/arrow/go/v17/arrow/memory" - "github.com/apache/arrow/go/v17/parquet/compress" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet/compress" ) // Constants for default property values used for the default reader, writer and column props. @@ -46,7 +46,7 @@ const ( DefaultStatsEnabled = true // If the stats are larger than 4K the writer will skip writing them out anyways. DefaultMaxStatsSize int64 = 4096 - DefaultCreatedBy = "parquet-go version 17.0.0-SNAPSHOT" + DefaultCreatedBy = "parquet-go version 18.0.0-SNAPSHOT" DefaultRootName = "schema" ) diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 4dac64de1e7ab..cb4adccb76771 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 742dc9a82dc47..875334af4526d 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index b216ad5abeb14..ec8ddbbb780df 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT ../../pom.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 36e96a8d6ce5d..6971b53638e48 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-algorithm Arrow Algorithms diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 5fafbf38c7cdf..b0fe96d6d5ce0 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -28,7 +28,7 @@ under the License. org.apache.arrow arrow-bom - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT pom Arrow Bill of Materials Arrow Bill of Materials diff --git a/java/c/pom.xml b/java/c/pom.xml index b5a995de1ba2f..52962354047b1 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-c-data diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 561877bd5cd36..79105dbfccda5 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-compression Arrow Compression diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 00e812a4c6ae6..c5c7468ccee84 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-dataset diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 3e0f4f8ce1b7d..c00bba5e6c763 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-core diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index c5612644422e9..97bce0c6ed5e3 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-integration-tests diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 5f7bd63f8f935..4833d30dbc33f 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-sql-jdbc-core diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 23b2c9c7cbef4..524b9cd4f8aae 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 9c7b5b956d354..9c8c5df07fb78 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-flight - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT flight-sql diff --git a/java/flight/pom.xml b/java/flight/pom.xml index 38495fa356e51..851f44d7bf19e 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-flight diff --git a/java/format/pom.xml b/java/format/pom.xml index f1edfb86253c0..1121930da42d2 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-format diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 2a1e83f3e21fc..70bde084b4216 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT org.apache.arrow.gandiva diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index b00c03a014980..77184d35b5ac7 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow.maven.plugins arrow-maven-plugins - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT module-info-compiler-maven-plugin maven-plugin diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 54b7757e8cebf..efa074deec5f5 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -32,7 +32,7 @@ under the License. org.apache.arrow.maven.plugins arrow-maven-plugins - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT pom Arrow Maven Plugins diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 729cc512ab111..95ef16aaa1cfe 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-core diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml index 1f645472a3398..2e70279b23b00 100644 --- a/java/memory/memory-netty-buffer-patch/pom.xml +++ b/java/memory/memory-netty-buffer-patch/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-netty-buffer-patch diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index f66899151128a..e29ca3a4d053c 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-netty diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index c1a42e3ff0081..6bba222d89cb3 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory-unsafe diff --git a/java/memory/pom.xml b/java/memory/pom.xml index 9eaad8a467895..0ca357beaa781 100644 --- a/java/memory/pom.xml +++ b/java/memory/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-memory pom diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 83b0a88da063b..0dfc26b469ce2 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-performance jar diff --git a/java/pom.xml b/java/pom.xml index b38e3382d4d24..f3f7432ba6aa1 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,7 +28,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT pom Apache Arrow Java Root POM diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 07a768e24d5e2..b69d24786cb14 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-tools Arrow Tools diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 7541cfd56f5dc..127a16511f01f 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 17.0.0-SNAPSHOT + 18.0.0-SNAPSHOT arrow-vector Arrow Vectors diff --git a/js/package.json b/js/package.json index cc8fc6f9b4708..c754f5b39c201 100644 --- a/js/package.json +++ b/js/package.json @@ -121,5 +121,5 @@ "engines": { "node": ">=12.0" }, - "version": "17.0.0-SNAPSHOT" + "version": "18.0.0-SNAPSHOT" } diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 4daca3a5bca8f..91c186a6765a5 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -100,7 +100,7 @@ endfunction() set(CMAKE_CXX_STANDARD 17) -set(MLARROW_VERSION "17.0.0-SNAPSHOT") +set(MLARROW_VERSION "18.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 980a63133c83c..a90dee70584b1 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -21,7 +21,7 @@ cmake_minimum_required(VERSION 3.16) project(pyarrow) -set(PYARROW_VERSION "17.0.0-SNAPSHOT") +set(PYARROW_VERSION "18.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") # Running from a Python sdist tarball diff --git a/python/pyproject.toml b/python/pyproject.toml index 86a90906d02f9..d863bb3e5f0ac 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -83,4 +83,4 @@ root = '..' version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' -fallback_version = '17.0.0a0' +fallback_version = '18.0.0a0' diff --git a/r/DESCRIPTION b/r/DESCRIPTION index bb4470e29037d..ff74c566ffeb3 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 16.1.0.9000 +Version: 17.0.0.9000 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index c2690e6248dbc..6159f3863c375 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,9 @@ under the License. --> -# arrow 16.1.0.9000 +# arrow 17.0.0.9000 + +# arrow 17.0.0 * R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) * `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350) diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index ceb68d773bdb4..10a233356b684 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -76,7 +76,7 @@ home: [C GLib](https://arrow.apache.org/docs/c_glib)
[C++](https://arrow.apache.org/docs/cpp)
[C#](https://github.com/apache/arrow/blob/main/csharp/README.md)
- [Go](https://pkg.go.dev/github.com/apache/arrow/go/v17)
+ [Go](https://pkg.go.dev/github.com/apache/arrow/go/v18)
[Java](https://arrow.apache.org/docs/java)
[JavaScript](https://arrow.apache.org/docs/js)
[Julia](https://github.com/apache/arrow-julia/blob/main/README.md)
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 43f0b3fac62a1..e8e26f22b05cd 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "16.1.0.9000 (dev)", + "name": "17.0.0.9000 (dev)", "version": "dev/" }, { - "name": "16.1.0 (release)", + "name": "17.0.0 (release)", "version": "" }, + { + "name": "16.1.0", + "version": "16.1/" + }, { "name": "15.0.2", "version": "15.0/" diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index daf40583ee7d3..1fbbe88bcc343 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index 5091384212023..a9eae43d59b9f 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index 042029b2c40cf..f24688e9cb0f8 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index 8245a0f12e681..263f35986616b 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index 1d41ae77dda04..42708a65026ee 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index 3d9b4d9d87fa6..4a9f27a7ef649 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index 6d92829c23b31..ba66b2cad16f3 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "17.0.0-SNAPSHOT" + VERSION = "18.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") From 55ce352be026c77dd6fd993c735fa58c3b8f4a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 16 Jul 2024 16:08:30 +0200 Subject: [PATCH 077/122] MINOR: [Release] Update .deb/.rpm changelogs for 17.0.0 --- .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++ .../apache-arrow-release/yum/apache-arrow-release.spec.in | 3 +++ dev/tasks/linux-packages/apache-arrow/debian/changelog | 6 ++++++ dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 3 +++ 4 files changed, 18 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 04aa586dc3c96..279c3cc14a4c8 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (17.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Thu, 11 Jul 2024 08:57:21 -0000 + apache-arrow-apt-source (16.1.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index f0eb785dd6bc7..fd8165d748d78 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Thu Jul 11 2024 Raúl Cumplido - 17.0.0-1 +- New upstream release. + * Thu May 09 2024 Raúl Cumplido - 16.1.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 35cc598fe6f87..f59bc9f66233e 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (17.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Thu, 11 Jul 2024 08:57:21 -0000 + apache-arrow (16.1.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 8d47407c03522..f588bb3f1ab5a 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -887,6 +887,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Thu Jul 11 2024 Raúl Cumplido - 17.0.0-1 +- New upstream release. + * Thu May 09 2024 Raúl Cumplido - 16.1.0-1 - New upstream release. From c3aad6ad53a3a8b5106acd49c979f902b1b7aab9 Mon Sep 17 00:00:00 2001 From: AlexSpace Date: Tue, 16 Jul 2024 22:21:30 +0800 Subject: [PATCH 078/122] GH-43249: [C++][Parquet] remove useless template parameter of `DeltaLengthByteArrayEncoder` (#43250) ### Rationale for this change see https://github.com/apache/arrow/issues/43249 ### What changes are included in this PR? remove template parameter of DeltaLengthByteArrayEncoder ### Are these changes tested? Covered by existing ### Are there any user-facing changes? No. * GitHub Issue: #43249 Lead-authored-by: N.A Co-authored-by: AlexSpace Signed-off-by: mwish --- cpp/src/parquet/encoding.cc | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index c3f2b79629d9b..16a1e249273f6 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -2732,7 +2732,6 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder class DeltaLengthByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder { public: @@ -2783,8 +2782,7 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, DeltaBitPackEncoder length_encoder_; }; -template -void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) { +void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) { AssertBaseBinary(values); if (::arrow::is_binary_like(values.type_id())) { PutBinaryArray(checked_cast(values)); @@ -2793,8 +2791,7 @@ void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) { } } -template -void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { +void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { if (num_values == 0) { return; } @@ -2823,10 +2820,9 @@ void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { } } -template -void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, - const uint8_t* valid_bits, - int64_t valid_bits_offset) { +void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, + const uint8_t* valid_bits, + int64_t valid_bits_offset) { if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); @@ -2839,8 +2835,7 @@ void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, } } -template -std::shared_ptr DeltaLengthByteArrayEncoder::FlushValues() { +std::shared_ptr DeltaLengthByteArrayEncoder::FlushValues() { std::shared_ptr encoded_lengths = length_encoder_.FlushValues(); std::shared_ptr data; @@ -3366,7 +3361,7 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder
prefix_length_encoder_; - DeltaLengthByteArrayEncoder suffix_encoder_; + DeltaLengthByteArrayEncoder suffix_encoder_; std::string last_value_; const ByteArray empty_; std::unique_ptr buffer_; @@ -3934,7 +3929,7 @@ std::unique_ptr MakeEncoder(Type::type type_num, Encoding::type encodin } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) { switch (type_num) { case Type::BYTE_ARRAY: - return std::make_unique>(descr, pool); + return std::make_unique(descr, pool); default: throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY"); } From cf5d7c7ec3cf45d82aad2b2ff67b799d078f0619 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Tue, 16 Jul 2024 22:48:21 +0800 Subject: [PATCH 079/122] GH-43202: [C++][Compute] Detect and explicit error for offset overflow in row table (#43226) ### Rationale for this change There are two places for the offset in a row table to possibly overflow: 1) Encoding columns into a row table; 2) Appending to a row table from another row table. They are particularly bad because they can cause silent wrong result for the computation. ### What changes are included in this PR? Detect such overflows in the aforementioned places and report an explicit error when an overflow is detected. ### Are these changes tested? UT included. ### Are there any user-facing changes? User code that used to be fake passing could see an explicit error after this change. **This PR contains a "Critical Fix".** * GitHub Issue: #43202 Authored-by: Ruoxi Sun Signed-off-by: Antoine Pitrou --- cpp/src/arrow/compute/row/encode_internal.cc | 26 ++-- cpp/src/arrow/compute/row/encode_internal.h | 6 +- cpp/src/arrow/compute/row/row_internal.cc | 12 +- cpp/src/arrow/compute/row/row_test.cc | 129 +++++++++++++++++++ 4 files changed, 160 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/compute/row/encode_internal.cc b/cpp/src/arrow/compute/row/encode_internal.cc index 88ab5b81b1e0a..658e0dffcac68 100644 --- a/cpp/src/arrow/compute/row/encode_internal.cc +++ b/cpp/src/arrow/compute/row/encode_internal.cc @@ -17,6 +17,7 @@ #include "arrow/compute/row/encode_internal.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/int_util_overflow.h" namespace arrow { namespace compute { @@ -160,8 +161,8 @@ Status RowTableEncoder::EncodeSelected(RowTableImpl* rows, uint32_t num_selected /*num_extra_bytes_to_append=*/static_cast(0))); // Then populate the offsets of the var-length columns, which will be used as the target // size of the var-length buffers resizing below. - EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, num_selected, - selection); + RETURN_NOT_OK(EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, + num_selected, selection)); // Last AppendEmpty with zero rows and zero extra bytes to resize the var-length buffers // based on the populated offsets. RETURN_NOT_OK( @@ -667,12 +668,12 @@ void EncoderOffsets::Decode(uint32_t start_row, uint32_t num_rows, } } -void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows, - const std::vector& cols, - uint32_t num_selected, - const uint16_t* selection) { +Status EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows, + const std::vector& cols, + uint32_t num_selected, + const uint16_t* selection) { if (rows->metadata().is_fixed_length) { - return; + return Status::OK(); } uint32_t* row_offsets = rows->mutable_offsets(); @@ -713,9 +714,18 @@ void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows, uint32_t length = row_offsets[i]; length += RowTableMetadata::padding_for_alignment(length, row_alignment); row_offsets[i] = sum; - sum += length; + uint32_t sum_maybe_overflow = 0; + if (ARROW_PREDICT_FALSE( + arrow::internal::AddWithOverflow(sum, length, &sum_maybe_overflow))) { + return Status::Invalid( + "Offset overflow detected in EncoderOffsets::GetRowOffsetsSelected for row ", i, + " of length ", length, " bytes, current length in total is ", sum, " bytes"); + } + sum = sum_maybe_overflow; } row_offsets[num_selected] = sum; + + return Status::OK(); } template diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index 2afc150530b9e..0618ddd8e4b96 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -227,9 +227,9 @@ class EncoderBinaryPair { class EncoderOffsets { public: - static void GetRowOffsetsSelected(RowTableImpl* rows, - const std::vector& cols, - uint32_t num_selected, const uint16_t* selection); + static Status GetRowOffsetsSelected(RowTableImpl* rows, + const std::vector& cols, + uint32_t num_selected, const uint16_t* selection); static void EncodeSelected(RowTableImpl* rows, const std::vector& cols, uint32_t num_selected, const uint16_t* selection); diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index 0d418fdecf488..2365ef5632cce 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -18,6 +18,7 @@ #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" +#include "arrow/util/int_util_overflow.h" namespace arrow { namespace compute { @@ -325,14 +326,21 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from, // Varying-length rows auto from_offsets = reinterpret_cast(from.offsets_->data()); auto to_offsets = reinterpret_cast(offsets_->mutable_data()); - // TODO(GH-43202): The following two variables are possibly overflowing. uint32_t total_length = to_offsets[num_rows_]; uint32_t total_length_to_append = 0; for (uint32_t i = 0; i < num_rows_to_append; ++i) { uint16_t row_id = source_row_ids ? source_row_ids[i] : i; uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id]; total_length_to_append += length; - to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append; + uint32_t to_offset_maybe_overflow = 0; + if (ARROW_PREDICT_FALSE(arrow::internal::AddWithOverflow( + total_length, total_length_to_append, &to_offset_maybe_overflow))) { + return Status::Invalid( + "Offset overflow detected in RowTableImpl::AppendSelectionFrom for row ", + num_rows_ + i, " of length ", length, " bytes, current length in total is ", + to_offsets[num_rows_ + i], " bytes"); + } + to_offsets[num_rows_ + i + 1] = to_offset_maybe_overflow; } RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(total_length_to_append)); diff --git a/cpp/src/arrow/compute/row/row_test.cc b/cpp/src/arrow/compute/row/row_test.cc index 2c1a60dfb231c..679ad519a9ef2 100644 --- a/cpp/src/arrow/compute/row/row_test.cc +++ b/cpp/src/arrow/compute/row/row_test.cc @@ -125,5 +125,134 @@ TEST(RowTableMemoryConsumption, Encode) { } } +// GH-43202: Ensure that when offset overflow happens in encoding the row table, an +// explicit error is raised instead of a silent wrong result. +TEST(RowTableOffsetOverflow, LARGE_MEMORY_TEST(Encode)) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // Use 8 512MB var-length rows (occupies 4GB+) to overflow the offset in the row table. + constexpr int64_t num_rows = 8; + constexpr int64_t length_per_binary = 512 * 1024 * 1024; + constexpr int64_t row_alignment = sizeof(uint32_t); + constexpr int64_t var_length_alignment = sizeof(uint32_t); + + MemoryPool* pool = default_memory_pool(); + + // The column to encode. + std::vector columns; + std::vector values; + ASSERT_OK_AND_ASSIGN( + auto value, ::arrow::gen::Constant( + std::make_shared(std::string(length_per_binary, 'X'))) + ->Generate(1)); + values.push_back(std::move(value)); + ExecBatch batch = ExecBatch(std::move(values), 1); + ASSERT_OK(ColumnArraysFromExecBatch(batch, &columns)); + + // The row table. + std::vector column_metadatas; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + RowTableMetadata table_metadata; + table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment, + var_length_alignment); + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata)); + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, row_alignment, var_length_alignment); + + // The rows to encode. + std::vector row_ids(num_rows, 0); + + // Encoding 7 rows should be fine. + { + row_encoder.PrepareEncodeSelected(0, num_rows - 1, columns); + ASSERT_OK(row_encoder.EncodeSelected(&row_table, static_cast(num_rows - 1), + row_ids.data())); + } + + // Encoding 8 rows should overflow. + { + int64_t length_per_row = table_metadata.fixed_length + length_per_binary; + std::stringstream expected_error_message; + expected_error_message << "Invalid: Offset overflow detected in " + "EncoderOffsets::GetRowOffsetsSelected for row " + << num_rows - 1 << " of length " << length_per_row + << " bytes, current length in total is " + << length_per_row * (num_rows - 1) << " bytes"; + row_encoder.PrepareEncodeSelected(0, num_rows, columns); + ASSERT_RAISES_WITH_MESSAGE( + Invalid, expected_error_message.str(), + row_encoder.EncodeSelected(&row_table, static_cast(num_rows), + row_ids.data())); + } +} + +// GH-43202: Ensure that when offset overflow happens in appending to the row table, an +// explicit error is raised instead of a silent wrong result. +TEST(RowTableOffsetOverflow, LARGE_MEMORY_TEST(AppendFrom)) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // Use 8 512MB var-length rows (occupies 4GB+) to overflow the offset in the row table. + constexpr int64_t num_rows = 8; + constexpr int64_t length_per_binary = 512 * 1024 * 1024; + constexpr int64_t num_rows_seed = 1; + constexpr int64_t row_alignment = sizeof(uint32_t); + constexpr int64_t var_length_alignment = sizeof(uint32_t); + + MemoryPool* pool = default_memory_pool(); + + // The column to encode. + std::vector columns; + std::vector values; + ASSERT_OK_AND_ASSIGN( + auto value, ::arrow::gen::Constant( + std::make_shared(std::string(length_per_binary, 'X'))) + ->Generate(num_rows_seed)); + values.push_back(std::move(value)); + ExecBatch batch = ExecBatch(std::move(values), num_rows_seed); + ASSERT_OK(ColumnArraysFromExecBatch(batch, &columns)); + + // The seed row table. + std::vector column_metadatas; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + RowTableMetadata table_metadata; + table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment, + var_length_alignment); + RowTableImpl row_table_seed; + ASSERT_OK(row_table_seed.Init(pool, table_metadata)); + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, row_alignment, var_length_alignment); + row_encoder.PrepareEncodeSelected(0, num_rows_seed, columns); + std::vector row_ids(num_rows_seed, 0); + ASSERT_OK(row_encoder.EncodeSelected( + &row_table_seed, static_cast(num_rows_seed), row_ids.data())); + + // The target row table. + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata)); + + // Appending the seed 7 times should be fine. + for (int i = 0; i < num_rows - 1; ++i) { + ASSERT_OK(row_table.AppendSelectionFrom(row_table_seed, num_rows_seed, + /*source_row_ids=*/NULLPTR)); + } + + // Appending the seed the 8-th time should overflow. + int64_t length_per_row = table_metadata.fixed_length + length_per_binary; + std::stringstream expected_error_message; + expected_error_message + << "Invalid: Offset overflow detected in RowTableImpl::AppendSelectionFrom for row " + << num_rows - 1 << " of length " << length_per_row + << " bytes, current length in total is " << length_per_row * (num_rows - 1) + << " bytes"; + ASSERT_RAISES_WITH_MESSAGE(Invalid, expected_error_message.str(), + row_table.AppendSelectionFrom(row_table_seed, num_rows_seed, + /*source_row_ids=*/NULLPTR)); +} + } // namespace compute } // namespace arrow From 81b54258ca09d80d6e94c7bce89cffc652f70474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 16 Jul 2024 23:20:14 +0200 Subject: [PATCH 080/122] GH-43282: [Release][Docs][Packaging] Upload correct docs job when uploading binaries (#43283) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We are currently missing to upload binaries ### What changes are included in this PR? Update task name to correct one. ### Are these changes tested? No ### Are there any user-facing changes? No * GitHub Issue: #43282 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- dev/release/binary-task.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index db008b1551309..b06b1c46b8504 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1895,7 +1895,7 @@ def define_docs_tasks :docs, "#{rc_dir}/docs/#{full_version}", "#{release_dir}/docs/#{full_version}", - "test-ubuntu-22.04-docs/**/*") + "test-debian-12-docs/**/*") end def define_nuget_tasks From 9f6c1b4f5ec269a6ed614411943f55ec455d6cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 17 Jul 2024 00:08:38 +0200 Subject: [PATCH 081/122] MINOR: [Docs][Release] Clarification on when to run the new Update Release Notes on GH Release (#43272) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Minor docs clarification on when to run the script. We could potentially search for the PR and validate it has been merged on the script to avoid issues but this change on the docs still applies. ### What changes are included in this PR? Just a minor clarification on the docs. ### Are these changes tested? No ### Are there any user-facing changes? No Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- docs/source/developers/release.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index fae48e4d8e0f0..0d9af1f543cac 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -400,12 +400,13 @@ Be sure to go through on the following checklist: :class-title: sd-fs-5 :class-container: sd-shadow-md - A committer must run the following script: + A committer must run the following script. This has to be done once the + Pull Request from the Update Website script has been merged: .. code-block:: Bash # dev/release/post-05-update-gh-release-notes.sh 17.0.0 - dev/release/post-05-update-gh-release-notes.sh apache-arrow-X.Y.Z + dev/release/post-05-update-gh-release-notes.sh .. dropdown:: Update Homebrew packages :animate: fade-in-slide-down From 25434bbe985442edfb811da976f5fa7cf4ee90f4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:25:04 -0700 Subject: [PATCH 082/122] MINOR: [C#] Bump xunit from 2.8.1 to 2.9.0 in /csharp (#43259) Bumps xunit from 2.8.1 to 2.9.0. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.8.1&new-version=2.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index e047bd61a0821..047cdb94b963e 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index d1974a7838e7a..dc95f9edf9f7f 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 61e6c06f451c5..e68a97670cc7e 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 3dbe34955e6d4..f05338313063c 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -17,7 +17,7 @@ - + all runtime; build; native; contentfiles; analyzers From de17643e3176d821e02873c776ee91f6e53d3314 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Wed, 17 Jul 2024 02:04:37 +0200 Subject: [PATCH 083/122] GH-43229: [Java] Update Maven project info (#43231) ### Rationale for this change Some Maven modules are missing project information like the website url, the mailing lists, and scm and issues url Other may have incorrect links because of the way Maven interpolates those values at build time ### What changes are included in this PR? Update/Fix Maven project information for all modules: * Add project url, mailing lists, scm and issueManagement information to bom and maven parent modules * Fix top-level parent by preventing Maven to rewrite project url, and scm connections/urls based on the module hierarchy * Change project.scm.tag to `main` and update version change script to also change the tag value to `apache-arrow-${version}` ### Are these changes tested? CI/CD only ### Are there any user-facing changes? No * GitHub Issue: #43229 Authored-by: Laurent Goujon Signed-off-by: David Li --- dev/release/01-prepare-test.rb | 11 ++++++++ dev/release/utils-prepare.sh | 9 ++++-- java/bom/pom.xml | 50 +++++++++++++++++++++++++++++++++- java/maven/pom.xml | 50 +++++++++++++++++++++++++++++++++- java/pom.xml | 25 ++++++++++++----- 5 files changed, 134 insertions(+), 11 deletions(-) diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index fbd0b2996077c..fec99ef058c5b 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -323,6 +323,17 @@ def test_version_pre_tag "+#{new_line}", ] end + tag = "main" + target_lines = lines.grep(/#{Regexp.escape(tag)}/) + target_lines.each do |line| + new_line = line.gsub("main") do + "apache-arrow-#{@release_version}" + end + hunks << [ + "-#{line}", + "+#{new_line}", + ] + end expected_changes << {hunks: hunks, path: path} end diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index c255e728a335b..760a7f404a74d 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -83,8 +83,13 @@ update_versions() { popd pushd "${ARROW_DIR}/java" - mvn versions:set -DnewVersion=${version} -DprocessAllModules - find . -type f -name pom.xml.versionsBackup -delete + mvn versions:set -DnewVersion=${version} -DprocessAllModules -DgenerateBackupPoms=false + if [ "${type}" = "release" ]; then + # versions-maven-plugin:set-scm-tag does not update the whole reactor. Invoking separately + mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-java-root + mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-bom + mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-maven-plugins + fi git add "pom.xml" git add "**/pom.xml" popd diff --git a/java/bom/pom.xml b/java/bom/pom.xml index b0fe96d6d5ce0..e51906cd77e35 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -17,7 +17,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 @@ -30,8 +30,51 @@ under the License. arrow-bom 18.0.0-SNAPSHOT pom + Arrow Bill of Materials Arrow Bill of Materials + https://arrow.apache.org/ + + + + Developer List + dev-subscribe@arrow.apache.org + dev-unsubscribe@arrow.apache.org + dev@arrow.apache.org + https://lists.apache.org/list.html?dev@arrow.apache.org + + + Commits List + commits-subscribe@arrow.apache.org + commits-unsubscribe@arrow.apache.org + commits@arrow.apache.org + https://lists.apache.org/list.html?commits@arrow.apache.org + + + Issues List + issues-subscribe@arrow.apache.org + issues-unsubscribe@arrow.apache.org + https://lists.apache.org/list.html?issues@arrow.apache.org + + + GitHub List + github-subscribe@arrow.apache.org + github-unsubscribe@arrow.apache.org + https://lists.apache.org/list.html?github@arrow.apache.org + + + + + scm:git:https://github.com/apache/arrow.git + scm:git:https://github.com/apache/arrow.git + main + https://github.com/apache/arrow/tree/${project.scm.tag} + + + + GitHub + https://github.com/apache/arrow/issues + @@ -169,6 +212,11 @@ under the License. spotless-maven-plugin 2.30.0 + + org.codehaus.mojo + versions-maven-plugin + 2.17.0 + diff --git a/java/maven/pom.xml b/java/maven/pom.xml index efa074deec5f5..d342b629358dd 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -17,7 +17,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 @@ -75,6 +118,11 @@ under the License. cyclonedx-maven-plugin 2.8.0 + + org.codehaus.mojo + versions-maven-plugin + 2.17.0 + diff --git a/java/pom.xml b/java/pom.xml index f3f7432ba6aa1..4ce0c1981d295 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -17,7 +17,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> - + 4.0.0 @@ -41,20 +41,26 @@ under the License. dev-subscribe@arrow.apache.org dev-unsubscribe@arrow.apache.org dev@arrow.apache.org - https://mail-archives.apache.org/mod_mbox/arrow-dev/ + https://lists.apache.org/list.html?dev@arrow.apache.org Commits List commits-subscribe@arrow.apache.org commits-unsubscribe@arrow.apache.org commits@arrow.apache.org - https://mail-archives.apache.org/mod_mbox/arrow-commits/ + https://lists.apache.org/list.html?commits@arrow.apache.org Issues List issues-subscribe@arrow.apache.org issues-unsubscribe@arrow.apache.org - https://mail-archives.apache.org/mod_mbox/arrow-issues/ + https://lists.apache.org/list.html?issues@arrow.apache.org + + + GitHub List + github-subscribe@arrow.apache.org + github-unsubscribe@arrow.apache.org + https://lists.apache.org/list.html?github@arrow.apache.org @@ -73,11 +79,11 @@ under the License. compression - + scm:git:https://github.com/apache/arrow.git scm:git:https://github.com/apache/arrow.git - apache-arrow-2.0.0 - https://github.com/apache/arrow + main + https://github.com/apache/arrow/tree/${project.scm.tag} @@ -507,6 +513,11 @@ under the License. exec-maven-plugin 3.3.0 + + org.codehaus.mojo + versions-maven-plugin + 2.17.0 + pl.project13.maven git-commit-id-plugin From 5224f20b2a41e09da4dbdf76beee3203518be386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 17 Jul 2024 09:49:27 +0200 Subject: [PATCH 084/122] GH-43270: [Release] Fix input variables on post-01-tag.sh (#43271) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Scripts is failing to be executed ### What changes are included in this PR? Fixing the script ### Are these changes tested? Locally during the release ### Are there any user-facing changes? No * GitHub Issue: #43270 Lead-authored-by: Raúl Cumplido Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/release/post-01-tag.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dev/release/post-01-tag.sh b/dev/release/post-01-tag.sh index df0f6756c0e7c..2fa48f1230219 100755 --- a/dev/release/post-01-tag.sh +++ b/dev/release/post-01-tag.sh @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. -set -e +set -eu set -o pipefail if [ "$#" -ne 2 ]; then @@ -25,8 +25,11 @@ if [ "$#" -ne 2 ]; then exit fi +version=$1 +rc=$2 + # Create the release tag and trigger the Publish Release workflow. -release_candidate_tag=apache-arrow-${version}-rc${num} release_tag=apache-arrow-${version} -git tag -a ${release_tag} ${release_candidate_tag}^{} -m "[Release] Apache Arrow Release ${version}" +release_candidate_tag=${release_tag}-rc${rc} +git tag -a ${release_tag} ${release_candidate_tag} -m "[Release] Apache Arrow Release ${version}" git push apache ${release_tag} From c66b3f149f92e1fae0b33cc63c6093db2deedd29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 17 Jul 2024 10:43:53 +0200 Subject: [PATCH 085/122] MINOR: [Release] Update .deb package names for 18.0.0 --- .../apache-arrow/debian/control.in | 104 +++++++++--------- ...700.install => libarrow-acero1800.install} | 0 ...install => libarrow-cuda-glib1800.install} | 0 ...1700.install => libarrow-cuda1800.install} | 0 ...tall => libarrow-dataset-glib1800.install} | 0 ...0.install => libarrow-dataset1800.install} | 0 ...stall => libarrow-flight-glib1800.install} | 0 ...l => libarrow-flight-sql-glib1800.install} | 0 ...nstall => libarrow-flight-sql1800.install} | 0 ...00.install => libarrow-flight1800.install} | 0 ...1700.install => libarrow-glib1800.install} | 0 ...arrow1700.install => libarrow1800.install} | 0 ...00.install => libgandiva-glib1800.install} | 0 ...iva1700.install => libgandiva1800.install} | 0 ...00.install => libparquet-glib1800.install} | 0 ...uet1700.install => libparquet1800.install} | 0 dev/tasks/tasks.yml | 60 +++++----- 17 files changed, 82 insertions(+), 82 deletions(-) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-acero1700.install => libarrow-acero1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda-glib1700.install => libarrow-cuda-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda1700.install => libarrow-cuda1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset-glib1700.install => libarrow-dataset-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset1700.install => libarrow-dataset1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-glib1700.install => libarrow-flight-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-sql-glib1700.install => libarrow-flight-sql-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-sql1700.install => libarrow-flight-sql1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight1700.install => libarrow-flight1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-glib1700.install => libarrow-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow1700.install => libarrow1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva-glib1700.install => libgandiva-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva1700.install => libgandiva1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet-glib1700.install => libparquet-glib1800.install} (100%) rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet1700.install => libparquet1800.install} (100%) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 38397e7ef6bd9..24e2839021aa8 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -42,7 +42,7 @@ Build-Depends-Indep: libglib2.0-doc Standards-Version: 3.9.8 Homepage: https://arrow.apache.org/ -Package: libarrow1700 +Package: libarrow1800 Section: libs Architecture: any Multi-Arch: same @@ -62,12 +62,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides tools. -Package: libarrow-cuda1700 +Package: libarrow-cuda1800 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -75,12 +75,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for CUDA support. -Package: libarrow-acero1700 +Package: libarrow-acero1800 Section: libs Architecture: any Multi-Arch: same @@ -88,12 +88,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Acero module. -Package: libarrow-dataset1700 +Package: libarrow-dataset1800 Section: libs Architecture: any Multi-Arch: same @@ -101,13 +101,13 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-acero1700 (= ${binary:Version}), - libparquet1700 (= ${binary:Version}) + libarrow-acero1800 (= ${binary:Version}), + libparquet1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Dataset module. -Package: libarrow-flight1700 +Package: libarrow-flight1800 Section: libs Architecture: any Multi-Arch: same @@ -115,12 +115,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight RPC system. -Package: libarrow-flight-sql1700 +Package: libarrow-flight-sql1800 Section: libs Architecture: any Multi-Arch: same @@ -128,7 +128,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight1700 (= ${binary:Version}) + libarrow-flight1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight SQL system. @@ -139,7 +139,7 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow1700 (= ${binary:Version}), + libarrow1800 (= ${binary:Version}), @USE_SYSTEM_GRPC@ libabsl-dev, libbrotli-dev, libbz2-dev, @@ -167,7 +167,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-cuda1700 (= ${binary:Version}) + libarrow-cuda1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for CUDA support. @@ -178,7 +178,7 @@ Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow-acero1700 (= ${binary:Version}), + libarrow-acero1800 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -191,7 +191,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-acero-dev (= ${binary:Version}), - libarrow-dataset1700 (= ${binary:Version}), + libarrow-dataset1800 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -204,7 +204,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-flight1700 (= ${binary:Version}), + libarrow-flight1800 (= ${binary:Version}), libc-ares-dev, @USE_SYSTEM_GRPC@ libgrpc++-dev Description: Apache Arrow is a data processing library for analysis @@ -218,12 +218,12 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), - libarrow-flight-sql1700 (= ${binary:Version}) + libarrow-flight-sql1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for Flight SQL system. -Package: libgandiva1700 +Package: libgandiva1800 Section: libs Architecture: any Multi-Arch: same @@ -231,7 +231,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -244,13 +244,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libgandiva1700 (= ${binary:Version}) + libgandiva1800 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . This package provides C++ header files. -Package: libparquet1700 +Package: libparquet1800 Section: libs Architecture: any Multi-Arch: same @@ -270,7 +270,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libparquet1700 (= ${binary:Version}) + libparquet1800 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides tools. @@ -282,13 +282,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libparquet1700 (= ${binary:Version}), + libparquet1800 (= ${binary:Version}), libthrift-dev Description: Apache Parquet is a columnar storage format . This package provides C++ header files. -Package: libarrow-glib1700 +Package: libarrow-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -296,7 +296,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow1700 (= ${binary:Version}) + libarrow1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files. @@ -320,7 +320,7 @@ Depends: ${misc:Depends}, libglib2.0-dev, libarrow-acero-dev (= ${binary:Version}), - libarrow-glib1700 (= ${binary:Version}), + libarrow-glib1800 (= ${binary:Version}), gir1.2-arrow-1.0 (= ${binary:Version}) Suggests: libarrow-glib-doc Description: Apache Arrow is a data processing library for analysis @@ -338,7 +338,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations. -Package: libarrow-cuda-glib1700 +Package: libarrow-cuda-glib1800 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -346,8 +346,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libarrow-cuda1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libarrow-cuda1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for CUDA support. @@ -372,13 +372,13 @@ Depends: ${misc:Depends}, libarrow-cuda-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-cuda-glib1700 (= ${binary:Version}), + libarrow-cuda-glib1800 (= ${binary:Version}), gir1.2-arrow-cuda-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based header files for CUDA support. -Package: libarrow-dataset-glib1700 +Package: libarrow-dataset-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -386,8 +386,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libarrow-dataset1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libarrow-dataset1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for dataset module. @@ -412,7 +412,7 @@ Depends: ${misc:Depends}, libarrow-dataset-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-dataset-glib1700 (= ${binary:Version}), + libarrow-dataset-glib1800 (= ${binary:Version}), gir1.2-arrow-dataset-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -429,7 +429,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for dataset module. -Package: libarrow-flight-glib1700 +Package: libarrow-flight-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -437,8 +437,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libarrow-flight1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libarrow-flight1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight. @@ -464,7 +464,7 @@ Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-flight-glib1700 (= ${binary:Version}), + libarrow-flight-glib1800 (= ${binary:Version}), gir1.2-arrow-flight-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -481,7 +481,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight. -Package: libarrow-flight-sql-glib1700 +Package: libarrow-flight-sql-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -489,8 +489,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight-glib1700 (= ${binary:Version}), - libarrow-flight-sql1700 (= ${binary:Version}) + libarrow-flight-glib1800 (= ${binary:Version}), + libarrow-flight-sql1800 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight SQL. @@ -516,7 +516,7 @@ Depends: ${misc:Depends}, libarrow-flight-sql-dev (= ${binary:Version}), libarrow-flight-glib-dev (= ${binary:Version}), - libarrow-flight-sql-glib1700 (= ${binary:Version}), + libarrow-flight-sql-glib1800 (= ${binary:Version}), gir1.2-arrow-flight-sql-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -533,7 +533,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight SQL. -Package: libgandiva-glib1700 +Package: libgandiva-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -541,8 +541,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libgandiva1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libgandiva1800 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -569,7 +569,7 @@ Depends: ${misc:Depends}, libgandiva-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libgandiva-glib1700 (= ${binary:Version}), + libgandiva-glib1800 (= ${binary:Version}), gir1.2-gandiva-1.0 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. @@ -588,7 +588,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions . This package provides documentations. -Package: libparquet-glib1700 +Package: libparquet-glib1800 Section: libs Architecture: any Multi-Arch: same @@ -596,8 +596,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib1700 (= ${binary:Version}), - libparquet1700 (= ${binary:Version}) + libarrow-glib1800 (= ${binary:Version}), + libparquet1800 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides GLib based library files. @@ -622,7 +622,7 @@ Depends: ${misc:Depends}, libarrow-glib-dev (= ${binary:Version}), libparquet-dev (= ${binary:Version}), - libparquet-glib1700 (= ${binary:Version}), + libparquet-glib1800 (= ${binary:Version}), gir1.2-parquet-1.0 (= ${binary:Version}) Suggests: libparquet-glib-doc Description: Apache Parquet is a columnar storage format diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet1700.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 9c7d1ff3bbe12..2eb361047fc62 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -492,59 +492,59 @@ tasks: - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-acero1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-acero1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-acero1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-acero1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-glib1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-glib1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight1800_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow1800_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva1700_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva1800_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet1700_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet1800_{no_rc_version}-1_[a-z0-9]+.deb - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda-glib1700_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda1700_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda-glib1800_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda1800_{no_rc_version}-1_[a-z0-9]+.deb {% endif %} {% endfor %} {% endfor %} From a1376879ced6c0bc14dcc1e27c0c23c6ad1554a9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 17 Jul 2024 13:39:22 -0300 Subject: [PATCH 086/122] GH-43185: [C++] Suggest a cast when Concatenate fails due to offsets overflow (#43190) ## Rationale for this change When arrays using 32-bit offsets into data buffers are concatenated and the data buffers of the results grow beyond 2GB, `Concatenate` returns a bad `Status` with a very simple message: `"offset overflow while concatenating arrays"` The contract that `Concatenate` honors is very simple: arrays of input type T lead to output of the same type T, so we can't, for instance, return a `LARGE_STRING` [1] array when the input is `STRING`. But we can **suggest a cast** to the caller in case an overflow error is detected. Either programatically (by taking an output parameter) or by giving a better error message to users. [1] `LARGE_STRING` can use 64-bit offsets ### What changes are included in this PR? - Suggest casts when concatenation of the values of an FSL fail due to overflow - Suggest casts when concatenation of [LARGE_]LIST_VIEW array fails due to overflow - Suggest casts when concatenation of [LARGE_]LIST array fails due to overflow - Suggest a cast to LARGE_(BINARY|STRING) when offsets overflow ### Are these changes tested? Yes. * GitHub Issue: #43185 Lead-authored-by: Felipe Oliveira Carvalho Co-authored-by: Benjamin Kietzman Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/array/concatenate.cc | 262 ++++++++++++++++++------ cpp/src/arrow/array/concatenate.h | 16 ++ cpp/src/arrow/array/concatenate_test.cc | 103 +++++++++- 3 files changed, 316 insertions(+), 65 deletions(-) diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index 87e55246c78fe..b4638dd6593d8 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -75,6 +75,31 @@ struct Bitmap { bool AllSet() const { return data == nullptr; } }; +enum class OffsetBufferOpOutcome { + kOk, + kOffsetOverflow, +}; + +Status OffsetOverflowStatus() { + return Status::Invalid("offset overflow while concatenating arrays"); +} + +#define RETURN_IF_NOT_OK_OUTCOME(outcome) \ + switch (outcome) { \ + case OffsetBufferOpOutcome::kOk: \ + break; \ + case OffsetBufferOpOutcome::kOffsetOverflow: \ + return OffsetOverflowStatus(); \ + } + +struct ErrorHints { + /// \brief Suggested cast to avoid overflow during concatenation. + /// + /// If the concatenation of offsets overflows, this field might be set to the + /// a type that uses larger offsets (e.g. large_utf8, large_list). + std::shared_ptr suggested_cast; +}; + // Allocate a buffer and concatenate bitmaps into it. Status ConcatenateBitmaps(const std::vector& bitmaps, MemoryPool* pool, std::shared_ptr* out) { @@ -112,15 +137,16 @@ int64_t SumBufferSizesInBytes(const BufferVector& buffers) { // Write offsets in src into dst, adjusting them such that first_offset // will be the first offset written. template -Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, - Range* values_range); +Result PutOffsets(const Buffer& src, Offset first_offset, + Offset* dst, Range* values_range); // Concatenate buffers holding offsets into a single buffer of offsets, // also computing the ranges of values spanned by each buffer of offsets. template -Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool, - std::shared_ptr* out, - std::vector* values_ranges) { +Result ConcatenateOffsets(const BufferVector& buffers, + MemoryPool* pool, + std::shared_ptr* out, + std::vector* values_ranges) { values_ranges->resize(buffers.size()); // allocate output buffer @@ -133,26 +159,30 @@ Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool, for (size_t i = 0; i < buffers.size(); ++i) { // the first offset from buffers[i] will be adjusted to values_length // (the cumulative length of values spanned by offsets in previous buffers) - RETURN_NOT_OK(PutOffsets(*buffers[i], values_length, - out_data + elements_length, &(*values_ranges)[i])); + ARROW_ASSIGN_OR_RAISE(auto outcome, PutOffsets(*buffers[i], values_length, + out_data + elements_length, + &(*values_ranges)[i])); + if (ARROW_PREDICT_FALSE(outcome != OffsetBufferOpOutcome::kOk)) { + return outcome; + } elements_length += buffers[i]->size() / sizeof(Offset); values_length += static_cast((*values_ranges)[i].length); } // the final element in out_data is the length of all values spanned by the offsets out_data[out_size_in_bytes / sizeof(Offset)] = values_length; - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } template -Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, - Range* values_range) { +Result PutOffsets(const Buffer& src, Offset first_offset, + Offset* dst, Range* values_range) { if (src.size() == 0) { // It's allowed to have an empty offsets buffer for a 0-length array // (see Array::Validate) values_range->offset = 0; values_range->length = 0; - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } // Get the range of offsets to transfer from src @@ -162,8 +192,9 @@ Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, // Compute the range of values which is spanned by this range of offsets values_range->offset = src_begin[0]; values_range->length = *src_end - values_range->offset; - if (first_offset > std::numeric_limits::max() - values_range->length) { - return Status::Invalid("offset overflow while concatenating arrays"); + if (ARROW_PREDICT_FALSE(first_offset > + std::numeric_limits::max() - values_range->length)) { + return OffsetBufferOpOutcome::kOffsetOverflow; } // Write offsets into dst, ensuring that the first offset written is @@ -175,12 +206,14 @@ Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst, std::transform(src_begin, src_end, dst, [displacement](Offset offset) { return SafeSignedAdd(offset, displacement); }); - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } template -Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buffer& src, - offset_type displacement, offset_type* dst); +Result PutListViewOffsets(const ArrayData& input, + offset_type* sizes, const Buffer& src, + offset_type displacement, + offset_type* dst); // Concatenate buffers holding list-view offsets into a single buffer of offsets // @@ -198,10 +231,10 @@ Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buff // \param[in] in The child arrays // \param[in,out] sizes The concatenated sizes buffer template -Status ConcatenateListViewOffsets(const ArrayDataVector& in, offset_type* sizes, - const BufferVector& offset_buffers, - const std::vector& value_ranges, - MemoryPool* pool, std::shared_ptr* out) { +Result ConcatenateListViewOffsets( + const ArrayDataVector& in, offset_type* sizes, const BufferVector& offset_buffers, + const std::vector& value_ranges, MemoryPool* pool, + std::shared_ptr* out) { DCHECK_EQ(offset_buffers.size(), value_ranges.size()); // Allocate resulting offsets buffer and initialize it with zeros @@ -216,26 +249,32 @@ Status ConcatenateListViewOffsets(const ArrayDataVector& in, offset_type* sizes, for (size_t i = 0; i < offset_buffers.size(); ++i) { const auto displacement = static_cast(num_child_values - value_ranges[i].offset); - RETURN_NOT_OK(PutListViewOffsets(*in[i], /*sizes=*/sizes + elements_length, - /*src=*/*offset_buffers[i], displacement, - /*dst=*/out_offsets + elements_length)); + ARROW_ASSIGN_OR_RAISE(auto outcome, + PutListViewOffsets(*in[i], /*sizes=*/sizes + elements_length, + /*src=*/*offset_buffers[i], displacement, + /*dst=*/out_offsets + elements_length)); + if (ARROW_PREDICT_FALSE(outcome != OffsetBufferOpOutcome::kOk)) { + return outcome; + } elements_length += offset_buffers[i]->size() / sizeof(offset_type); num_child_values += value_ranges[i].length; if (num_child_values > std::numeric_limits::max()) { - return Status::Invalid("offset overflow while concatenating arrays"); + return OffsetBufferOpOutcome::kOffsetOverflow; } } DCHECK_EQ(elements_length, static_cast(out_size_in_bytes / sizeof(offset_type))); - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } template -Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buffer& src, - offset_type displacement, offset_type* dst) { +Result PutListViewOffsets(const ArrayData& input, + offset_type* sizes, const Buffer& src, + offset_type displacement, + offset_type* dst) { if (src.size() == 0) { - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } const auto& validity_buffer = input.buffers[0]; if (validity_buffer) { @@ -291,7 +330,7 @@ Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buff } } } - return Status::OK(); + return OffsetBufferOpOutcome::kOk; } class ConcatenateImpl { @@ -316,11 +355,17 @@ class ConcatenateImpl { } } - Status Concatenate(std::shared_ptr* out) && { + Status Concatenate(std::shared_ptr* out, ErrorHints* out_hints) && { if (out_->null_count != 0 && internal::may_have_validity_bitmap(out_->type->id())) { RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0])); } - RETURN_NOT_OK(VisitTypeInline(*out_->type, this)); + auto status = VisitTypeInline(*out_->type, this); + if (!status.ok()) { + if (out_hints) { + out_hints->suggested_cast = std::move(suggested_cast_); + } + return status; + } *out = std::move(out_); return Status::OK(); } @@ -337,11 +382,29 @@ class ConcatenateImpl { return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]); } - Status Visit(const BinaryType&) { + Status Visit(const BinaryType& input_type) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], + &value_ranges)); + switch (outcome) { + case OffsetBufferOpOutcome::kOk: + break; + case OffsetBufferOpOutcome::kOffsetOverflow: + switch (input_type.id()) { + case Type::BINARY: + suggested_cast_ = large_binary(); + break; + case Type::STRING: + suggested_cast_ = large_utf8(); + break; + default: + DCHECK(false) << "unexpected type id from BinaryType: " << input_type; + break; + } + return OffsetOverflowStatus(); + } ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges)); return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]); } @@ -349,8 +412,10 @@ class ConcatenateImpl { Status Visit(const LargeBinaryType&) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], + &value_ranges)); + RETURN_IF_NOT_OK_OUTCOME(outcome); ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges)); return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]); } @@ -394,22 +459,44 @@ class ConcatenateImpl { return Status::OK(); } - Status Visit(const ListType&) { + Status Visit(const ListType& input_type) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE(auto offsets_outcome, + ConcatenateOffsets(index_buffers, pool_, + &out_->buffers[1], &value_ranges)); + switch (offsets_outcome) { + case OffsetBufferOpOutcome::kOk: + break; + case OffsetBufferOpOutcome::kOffsetOverflow: + suggested_cast_ = large_list(input_type.value_type()); + return OffsetOverflowStatus(); + } ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges)); - return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]); + ErrorHints child_error_hints; + auto status = ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[0], &child_error_hints); + if (!status.ok() && child_error_hints.suggested_cast) { + suggested_cast_ = list(std::move(child_error_hints.suggested_cast)); + } + return status; } Status Visit(const LargeListType&) { std::vector value_ranges; ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t))); - RETURN_NOT_OK(ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], - &value_ranges)); + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateOffsets(index_buffers, pool_, &out_->buffers[1], + &value_ranges)); + RETURN_IF_NOT_OK_OUTCOME(outcome); ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges)); - return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]); + ErrorHints child_error_hints; + auto status = ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[0], &child_error_hints); + if (!status.ok() && child_error_hints.suggested_cast) { + suggested_cast_ = large_list(std::move(child_error_hints.suggested_cast)); + } + return status; } template @@ -430,8 +517,17 @@ class ConcatenateImpl { } // Concatenate the values + ErrorHints child_error_hints; ARROW_ASSIGN_OR_RAISE(ArrayDataVector value_data, ChildData(0, value_ranges)); - RETURN_NOT_OK(ConcatenateImpl(value_data, pool_).Concatenate(&out_->child_data[0])); + auto values_status = ConcatenateImpl(value_data, pool_) + .Concatenate(&out_->child_data[0], &child_error_hints); + if (!values_status.ok()) { + if (child_error_hints.suggested_cast) { + suggested_cast_ = std::make_shared>( + std::move(child_error_hints.suggested_cast)); + } + return values_status; + } out_->child_data[0]->type = type.value_type(); // Concatenate the sizes first @@ -440,22 +536,39 @@ class ConcatenateImpl { // Concatenate the offsets ARROW_ASSIGN_OR_RAISE(auto offset_buffers, Buffers(1, sizeof(offset_type))); - RETURN_NOT_OK(ConcatenateListViewOffsets( - in_, /*sizes=*/out_->buffers[2]->mutable_data_as(), offset_buffers, - value_ranges, pool_, &out_->buffers[1])); - + ARROW_ASSIGN_OR_RAISE( + auto outcome, ConcatenateListViewOffsets( + in_, /*sizes=*/out_->buffers[2]->mutable_data_as(), + offset_buffers, value_ranges, pool_, &out_->buffers[1])); + switch (outcome) { + case OffsetBufferOpOutcome::kOk: + break; + case OffsetBufferOpOutcome::kOffsetOverflow: + if constexpr (T::type_id == Type::LIST_VIEW) { + suggested_cast_ = large_list_view(type.value_type()); + } + return OffsetOverflowStatus(); + } return Status::OK(); } - Status Visit(const FixedSizeListType& fixed_size_list) { - ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fixed_size_list.list_size())); - return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]); + Status Visit(const FixedSizeListType& fsl_type) { + ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fsl_type.list_size())); + ErrorHints hints; + auto status = + ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0], &hints); + if (!status.ok() && hints.suggested_cast) { + suggested_cast_ = + fixed_size_list(std::move(hints.suggested_cast), fsl_type.list_size()); + } + return status; } Status Visit(const StructType& s) { for (int i = 0; i < s.num_fields(); ++i) { ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i)); - RETURN_NOT_OK(ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i])); + RETURN_NOT_OK(ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[i], /*hints=*/nullptr)); } return Status::OK(); } @@ -570,8 +683,8 @@ class ConcatenateImpl { case UnionMode::SPARSE: { for (int i = 0; i < u.num_fields(); i++) { ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i)); - RETURN_NOT_OK( - ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i])); + RETURN_NOT_OK(ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[i], /*hints=*/nullptr)); } break; } @@ -581,8 +694,8 @@ class ConcatenateImpl { for (size_t j = 0; j < in_.size(); j++) { child_data[j] = in_[j]->child_data[i]; } - RETURN_NOT_OK( - ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i])); + RETURN_NOT_OK(ConcatenateImpl(child_data, pool_) + .Concatenate(&out_->child_data[i], /*hints=*/nullptr)); } break; } @@ -666,7 +779,8 @@ class ConcatenateImpl { storage_data[i]->type = e.storage_type(); } std::shared_ptr out_storage; - RETURN_NOT_OK(ConcatenateImpl(storage_data, pool_).Concatenate(&out_storage)); + RETURN_NOT_OK(ConcatenateImpl(storage_data, pool_) + .Concatenate(&out_storage, /*hints=*/nullptr)); out_storage->type = in_[0]->type; out_ = std::move(out_storage); return Status::OK(); @@ -797,11 +911,18 @@ class ConcatenateImpl { const ArrayDataVector& in_; MemoryPool* pool_; std::shared_ptr out_; + std::shared_ptr suggested_cast_; }; } // namespace -Result> Concatenate(const ArrayVector& arrays, MemoryPool* pool) { +namespace internal { + +Result> Concatenate( + const ArrayVector& arrays, MemoryPool* pool, + std::shared_ptr* out_suggested_cast) { + DCHECK(out_suggested_cast); + *out_suggested_cast = nullptr; if (arrays.size() == 0) { return Status::Invalid("Must pass at least one array"); } @@ -818,8 +939,31 @@ Result> Concatenate(const ArrayVector& arrays, MemoryPool } std::shared_ptr out_data; - RETURN_NOT_OK(ConcatenateImpl(data, pool).Concatenate(&out_data)); + ErrorHints hints; + auto status = ConcatenateImpl(data, pool).Concatenate(&out_data, &hints); + if (!status.ok()) { + if (hints.suggested_cast) { + DCHECK(status.IsInvalid()); + *out_suggested_cast = std::move(hints.suggested_cast); + } + return status; + } return MakeArray(std::move(out_data)); } +} // namespace internal + +Result> Concatenate(const ArrayVector& arrays, MemoryPool* pool) { + std::shared_ptr suggested_cast; + auto result = internal::Concatenate(arrays, pool, &suggested_cast); + if (!result.ok() && suggested_cast && arrays.size() > 0) { + DCHECK(result.status().IsInvalid()); + return Status::Invalid(result.status().message(), ", consider casting input from `", + *arrays[0]->type(), "` to `", *suggested_cast, "` first."); + } + return result; +} + +#undef RETURN_IF_NOT_OK_OUTCOME + } // namespace arrow diff --git a/cpp/src/arrow/array/concatenate.h b/cpp/src/arrow/array/concatenate.h index e7597aad812c4..aada5624d63a3 100644 --- a/cpp/src/arrow/array/concatenate.h +++ b/cpp/src/arrow/array/concatenate.h @@ -24,6 +24,22 @@ #include "arrow/util/visibility.h" namespace arrow { +namespace internal { + +/// \brief Concatenate arrays +/// +/// \param[in] arrays a vector of arrays to be concatenated +/// \param[in] pool memory to store the result will be allocated from this memory pool +/// \param[out] out_suggested_cast if a non-OK Result is returned, the function might set +/// out_suggested_cast to a cast suggestion that would allow concatenating the arrays +/// without overflow of offsets (e.g. string to large_string) +/// +/// \return the concatenated array +ARROW_EXPORT +Result> Concatenate(const ArrayVector& arrays, MemoryPool* pool, + std::shared_ptr* out_suggested_cast); + +} // namespace internal /// \brief Concatenate arrays /// diff --git a/cpp/src/arrow/array/concatenate_test.cc b/cpp/src/arrow/array/concatenate_test.cc index af595e897f9ee..aea5311575299 100644 --- a/cpp/src/arrow/array/concatenate_test.cc +++ b/cpp/src/arrow/array/concatenate_test.cc @@ -29,6 +29,7 @@ #include #include +#include #include #include "arrow/array.h" @@ -42,6 +43,7 @@ #include "arrow/testing/util.h" #include "arrow/type.h" #include "arrow/util/list_util.h" +#include "arrow/util/unreachable.h" namespace arrow { @@ -661,14 +663,103 @@ TEST_F(ConcatenateTest, ExtensionType) { }); } +std::shared_ptr LargeVersionOfType(const std::shared_ptr& type) { + switch (type->id()) { + case Type::BINARY: + return large_binary(); + case Type::STRING: + return large_utf8(); + case Type::LIST: + return large_list(static_cast(*type).value_type()); + case Type::LIST_VIEW: + return large_list_view(static_cast(*type).value_type()); + case Type::LARGE_BINARY: + case Type::LARGE_STRING: + case Type::LARGE_LIST: + case Type::LARGE_LIST_VIEW: + return type; + default: + Unreachable(); + } +} + +std::shared_ptr fixed_size_list_of_1(std::shared_ptr type) { + return fixed_size_list(std::move(type), 1); +} + TEST_F(ConcatenateTest, OffsetOverflow) { - auto fake_long = ArrayFromJSON(utf8(), "[\"\"]"); - fake_long->data()->GetMutableValues(1)[1] = + using TypeFactory = std::shared_ptr (*)(std::shared_ptr); + static const std::vector kNestedTypeFactories = { + list, large_list, list_view, large_list_view, fixed_size_list_of_1, + }; + + auto* pool = default_memory_pool(); + std::shared_ptr suggested_cast; + for (auto& ty : {binary(), utf8()}) { + auto large_ty = LargeVersionOfType(ty); + + auto fake_long = ArrayFromJSON(ty, "[\"\"]"); + fake_long->data()->GetMutableValues(1)[1] = + std::numeric_limits::max(); + // XXX: since the data fake_long claims to own isn't there, this would + // segfault if Concatenate didn't detect overflow and raise an error. + auto concatenate_status = Concatenate({fake_long, fake_long}); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, + ::testing::StrEq("Invalid: offset overflow while concatenating arrays, " + "consider casting input from `" + + ty->ToString() + "` to `large_" + ty->ToString() + "` first."), + concatenate_status); + + concatenate_status = + internal::Concatenate({fake_long, fake_long}, pool, &suggested_cast); + // Message is doesn't contain the suggested cast type when the caller + // asks for it by passing the output parameter. + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::StrEq("Invalid: offset overflow while concatenating arrays"), + concatenate_status); + ASSERT_TRUE(large_ty->Equals(*suggested_cast)); + + // Check that the suggested cast is correct when concatenation + // fails due to the child array being too large. + for (auto factory : kNestedTypeFactories) { + auto nested_ty = factory(ty); + auto expected_suggestion = factory(large_ty); + auto fake_long_list = ArrayFromJSON(nested_ty, "[[\"\"]]"); + fake_long_list->data()->child_data[0] = fake_long->data(); + + ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list, fake_long_list}, pool, + &suggested_cast) + .status()); + ASSERT_TRUE(suggested_cast->Equals(*expected_suggestion)); + } + } + + auto list_ty = list(utf8()); + auto fake_long_list = ArrayFromJSON(list_ty, "[[\"Hello\"]]"); + fake_long_list->data()->GetMutableValues(1)[1] = std::numeric_limits::max(); - std::shared_ptr concatenated; - // XX since the data fake_long claims to own isn't there, this will segfault if - // Concatenate doesn't detect overflow and raise an error. - ASSERT_RAISES(Invalid, Concatenate({fake_long, fake_long}).status()); + ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list, fake_long_list}, pool, + &suggested_cast) + .status()); + ASSERT_TRUE(suggested_cast->Equals(LargeVersionOfType(list_ty))); + + auto list_view_ty = list_view(null()); + auto fake_long_list_view = ArrayFromJSON(list_view_ty, "[[], []]"); + { + constexpr int kInt32Max = std::numeric_limits::max(); + auto* values = fake_long_list_view->data()->child_data[0].get(); + auto* mutable_offsets = fake_long_list_view->data()->GetMutableValues(1); + auto* mutable_sizes = fake_long_list_view->data()->GetMutableValues(2); + values->length = 2 * static_cast(kInt32Max); + mutable_offsets[1] = kInt32Max; + mutable_offsets[0] = kInt32Max; + mutable_sizes[0] = kInt32Max; + } + ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list_view, fake_long_list_view}, + pool, &suggested_cast) + .status()); + ASSERT_TRUE(suggested_cast->Equals(LargeVersionOfType(list_view_ty))); } TEST_F(ConcatenateTest, DictionaryConcatenateWithEmptyUint16) { From 4161898d5cd7d05ad0fd9f23e1e9df9439460c98 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Wed, 17 Jul 2024 22:27:52 +0200 Subject: [PATCH 087/122] GH-38051: [Java] Remove Java 8 support (#43139) ### What changes are included in this PR? * Remove support for Java 8 in Github actions and other CI/CD tasks and make Java 11 now the default version * Make Java 11 the minimum version required to build and run Arrow by changing the Maven project configuration: - Change minimum java version and source/target/release compiler properties to 11 - Remove `maven` modules - Remove jdk11+ profiles and integrate their content into the main section - Let maven-compiler-plugin process `module-info.java` files and address several declaration issues - Exclude non modularized modules from javadoc aggregate tasks - Exclude module-info.class files from shaded jars as it is not representative of the whole content and may actually directly coming from a 3rd party dependency. * Update documentation ### Are these changes tested? Through CI/CD. ### Are there any user-facing changes? Yes. Java 11 is now required to run any Arrow code **This PR includes breaking changes to public APIs.** * GitHub Issue: #38051 Authored-by: Laurent Goujon Signed-off-by: Dane Pitkin --- .env | 2 +- .github/workflows/java.yml | 2 +- ci/docker/conda-integration.dockerfile | 2 +- ci/docker/conda-python-hdfs.dockerfile | 2 +- ci/docker/conda-python-spark.dockerfile | 2 +- ci/docker/java-jni-manylinux-201x.dockerfile | 2 +- ci/docker/linux-apt-docs.dockerfile | 2 +- dev/conbench_envs/README.md | 6 +- dev/release/setup-rhel-rebuilds.sh | 4 +- dev/release/verify-release-candidate.sh | 2 +- dev/tasks/tasks.yml | 13 +- docker-compose.yml | 4 +- docs/source/developers/java/building.rst | 2 +- docs/source/java/flight_sql_jdbc_driver.rst | 2 +- docs/source/java/install.rst | 4 +- java/README.md | 2 +- java/adapter/jdbc/pom.xml | 45 +- .../jdbc/src/main/java/module-info.java | 1 + java/bom/pom.xml | 6 +- java/dataset/pom.xml | 22 +- java/flight/flight-core/pom.xml | 25 +- .../src/main/java/module-info.java | 3 + java/flight/flight-integration-tests/pom.xml | 24 +- java/flight/flight-sql-jdbc-driver/pom.xml | 1 + java/flight/flight-sql/pom.xml | 20 - .../flight-sql/src/main/java/module-info.java | 1 + java/flight/pom.xml | 13 - .../module-info-compiler-maven-plugin/pom.xml | 124 ------ .../plugins/BaseModuleInfoCompilerPlugin.java | 88 ---- .../plugins/ModuleInfoCompilerPlugin.java | 56 --- .../plugins/ModuleInfoTestCompilerPlugin.java | 49 -- java/maven/pom.xml | 419 ------------------ java/memory/memory-core/pom.xml | 81 ++-- .../src/main/java/module-info.java | 3 + java/pom.xml | 109 +---- java/tools/pom.xml | 24 +- java/tools/src/main/java/module-info.java | 2 +- 37 files changed, 136 insertions(+), 1033 deletions(-) delete mode 100644 java/maven/module-info-compiler-maven-plugin/pom.xml delete mode 100644 java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java delete mode 100644 java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java delete mode 100644 java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java delete mode 100644 java/maven/pom.xml diff --git a/.env b/.env index be35921f94c3a..1358aafe824a6 100644 --- a/.env +++ b/.env @@ -61,7 +61,7 @@ GCC_VERSION="" GO=1.21.8 STATICCHECK=v0.4.7 HDFS=3.2.1 -JDK=8 +JDK=11 KARTOTHEK=latest # LLVM 12 and GCC 11 reports -Wmismatched-new-delete. LLVM=14 diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 8eb2682dc077d..d4211c2c81cb5 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -58,7 +58,7 @@ jobs: strategy: fail-fast: false matrix: - jdk: [8, 11, 17, 21, 22] + jdk: [11, 17, 21, 22] maven: [3.9.6] image: [java] env: diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 78d2503b23df7..c602490d6b729 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -23,7 +23,7 @@ ARG arch=amd64 ARG maven=3.8.7 ARG node=16 ARG yarn=1.22 -ARG jdk=8 +ARG jdk=11 ARG go=1.21.8 # Install Archery and integration dependencies diff --git a/ci/docker/conda-python-hdfs.dockerfile b/ci/docker/conda-python-hdfs.dockerfile index fa4fa0d1fb772..4e5e1a402e282 100644 --- a/ci/docker/conda-python-hdfs.dockerfile +++ b/ci/docker/conda-python-hdfs.dockerfile @@ -20,7 +20,7 @@ ARG arch=amd64 ARG python=3.8 FROM ${repo}:${arch}-conda-python-${python} -ARG jdk=8 +ARG jdk=11 ARG maven=3.8.7 RUN mamba install -q -y \ maven=${maven} \ diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile index 866f6f37f8bd9..d95fe58b529f6 100644 --- a/ci/docker/conda-python-spark.dockerfile +++ b/ci/docker/conda-python-spark.dockerfile @@ -20,7 +20,7 @@ ARG arch=amd64 ARG python=3.8 FROM ${repo}:${arch}-conda-python-${python} -ARG jdk=8 +ARG jdk=11 ARG maven=3.8.7 ARG numpy=latest diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index 8b73c73c1d240..479f4aa598b18 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -33,7 +33,7 @@ RUN vcpkg install \ --x-feature=s3 # Install Java -ARG java=1.8.0 +ARG java=11 ARG maven=3.9.3 RUN yum install -y java-$java-openjdk-devel && \ yum clean all && \ diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 1c916840e071b..0804f3543c283 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -19,7 +19,7 @@ ARG base FROM ${base} ARG r=4.4 -ARG jdk=8 +ARG jdk=11 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium diff --git a/dev/conbench_envs/README.md b/dev/conbench_envs/README.md index 509dc5c0c9537..7fab503974805 100644 --- a/dev/conbench_envs/README.md +++ b/dev/conbench_envs/README.md @@ -99,16 +99,16 @@ Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.s ### 2. Install Arrow dependencies for Java sudo su - apt-get install openjdk-8-jdk + apt-get install openjdk-11-jdk apt-get install maven Verify that you have at least these versions of `java`, `javac` and `maven`: # java -version - openjdk version "1.8.0_292" + openjdk version "11.0.22" 2024-01-16 .. # javac -version - javac 1.8.0_292 + javac 11.0.22 ... # mvn -version Apache Maven 3.6.3 diff --git a/dev/release/setup-rhel-rebuilds.sh b/dev/release/setup-rhel-rebuilds.sh index dc190d2d2426e..e8861a19f35b7 100755 --- a/dev/release/setup-rhel-rebuilds.sh +++ b/dev/release/setup-rhel-rebuilds.sh @@ -35,7 +35,7 @@ dnf -y install \ cmake \ git \ gobject-introspection-devel \ - java-1.8.0-openjdk-devel \ + java-11-openjdk-devel \ libcurl-devel \ llvm-devel \ llvm-toolset \ @@ -55,3 +55,5 @@ npm install -g yarn python3 -m ensurepip --upgrade alternatives --set python /usr/bin/python3 +alternatives --set java java-11-openjdk.$(uname -i) +alternatives --set javac java-11-openjdk.$(uname -i) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 2f4b203f217af..6a36109dc2fc1 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -21,7 +21,7 @@ # Requirements # - Ruby >= 2.3 # - Maven >= 3.8.7 -# - JDK >=8 +# - JDK >= 11 # - gcc >= 4.8 # - Node.js >= 18 # - Go >= 1.21 diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 2eb361047fc62..32534e80528af 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -745,9 +745,6 @@ tasks: - arrow-jdbc-{no_rc_snapshot_version}-tests.jar - arrow-jdbc-{no_rc_snapshot_version}.jar - arrow-jdbc-{no_rc_snapshot_version}.pom - - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json - - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-maven-plugins-{no_rc_snapshot_version}.pom - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml - arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar @@ -843,12 +840,6 @@ tasks: - flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar - flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar - flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.json - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom ############################## NuGet packages ############################### @@ -1549,9 +1540,7 @@ tasks: image: conda-python-hdfs {% endfor %} -{% for python_version, spark_version, test_pyarrow_only, numpy_version, jdk_version in [("3.8", "v3.5.0", "false", "latest", "8"), - ("3.10", "v3.5.0", "false", "1.23", "8"), - ("3.11", "master", "false", "latest", "17")] %} +{% for python_version, spark_version, test_pyarrow_only, numpy_version, jdk_version in [("3.11", "master", "false", "latest", "17")] %} test-conda-python-{{ python_version }}-spark-{{ spark_version }}: ci: github template: docker-tests/github.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index fa248d59037d3..cf22324f7cfb4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1202,7 +1202,7 @@ services: build: args: base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG} - java: 1.8.0 + java: 11 context: . dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile cache_from: @@ -1747,7 +1747,7 @@ services: # docker-compose run java # Parameters: # MAVEN: 3.9.5 - # JDK: 8, 11, 17, 21 + # JDK: 11, 17, 21 image: ${ARCH}/maven:${MAVEN}-eclipse-temurin-${JDK} shm_size: *shm-size volumes: &java-volumes diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index 82053e901186c..5ee80211584a0 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -32,7 +32,7 @@ Arrow Java uses the `Maven `_ build system. Building requires: -* JDK 8+ +* JDK 11+ * Maven 3+ .. note:: diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst index f95c2ac755d97..0224cc3235652 100644 --- a/docs/source/java/flight_sql_jdbc_driver.rst +++ b/docs/source/java/flight_sql_jdbc_driver.rst @@ -28,7 +28,7 @@ This driver can be used with any database that implements Flight SQL. Installation and Requirements ============================= -The driver is compatible with JDK 8+. On JDK 9+, the following JVM +The driver is compatible with JDK 11+. Note that the following JVM parameter is required: .. code-block:: shell diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index dc6a55c87fcd6..3bdd416b8e792 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -30,9 +30,9 @@ Java Compatibility ================== Java modules are compatible with JDK 8 and above. Currently, JDK versions -8, 11, 17, and 21 are tested in CI. The latest JDK is also tested in CI. +11, 17, and 21 are tested in CI. The latest JDK is also tested in CI. -When using Java 9 or later, some JDK internals must be exposed by +Note that some JDK internals must be exposed by adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED`` to the ``java`` command: .. code-block:: shell diff --git a/java/README.md b/java/README.md index 25e35c10973e9..9f1b1c63c8f41 100644 --- a/java/README.md +++ b/java/README.md @@ -85,7 +85,7 @@ variable are set, the system property takes precedence. ## Java Properties - * For Java 9 or later, should set `-Dio.netty.tryReflectionSetAccessible=true`. + * `-Dio.netty.tryReflectionSetAccessible=true` should be set. This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by Netty. * To support duplicate fields in a `StructVector` enable `-Darrow.struct.conflict.policy=CONFLICT_APPEND`. Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 875334af4526d..e86dfcb0b0a52 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -82,7 +82,6 @@ under the License. com.fasterxml.jackson.core jackson-annotations - test @@ -93,24 +92,30 @@ under the License. - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC + + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + verify + + + com.fasterxml.jackson.core:jackson-annotations + - - - - - - + + + + + org.apache.maven.plugins + maven-surefire-plugin + + --add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC + + + + diff --git a/java/adapter/jdbc/src/main/java/module-info.java b/java/adapter/jdbc/src/main/java/module-info.java index 5b59ce768472a..04977222c1530 100644 --- a/java/adapter/jdbc/src/main/java/module-info.java +++ b/java/adapter/jdbc/src/main/java/module-info.java @@ -20,6 +20,7 @@ exports org.apache.arrow.adapter.jdbc; exports org.apache.arrow.adapter.jdbc.binder; + requires com.fasterxml.jackson.annotation; requires com.fasterxml.jackson.databind; requires java.sql; requires jdk.unsupported; diff --git a/java/bom/pom.xml b/java/bom/pom.xml index e51906cd77e35..ad6532b1192bb 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -79,8 +79,10 @@ under the License. - 1.8 - 1.8 + 11 + 11 + 11 + 11 3.12.0 3.2.5 0.16.1 diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index c5c7468ccee84..74071a6c305ad 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -165,6 +165,7 @@ under the License. test + @@ -179,6 +180,7 @@ under the License. maven-surefire-plugin + --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED false ${project.basedir}/../../testing/data @@ -202,24 +204,4 @@ under the License. - - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED - - - - - - diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index c00bba5e6c763..d4083383a2f44 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -144,11 +144,13 @@ under the License. test + maven-surefire-plugin + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED false ${project.basedir}/../../../testing/data @@ -198,27 +200,4 @@ under the License. - - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - ${project.basedir}/../../../testing/data - - - - - - - diff --git a/java/flight/flight-core/src/main/java/module-info.java b/java/flight/flight-core/src/main/java/module-info.java index f6bf5b73b0972..ff0d7427b59cc 100644 --- a/java/flight/flight-core/src/main/java/module-info.java +++ b/java/flight/flight-core/src/main/java/module-info.java @@ -31,12 +31,15 @@ requires io.grpc.netty; requires io.grpc.protobuf; requires io.grpc.stub; + requires io.netty.buffer; requires io.netty.common; requires io.netty.handler; requires io.netty.transport; + requires jsr305; requires org.apache.arrow.format; requires org.apache.arrow.memory.core; requires org.apache.arrow.vector; requires protobuf.java; + requires protobuf.java.util; requires org.slf4j; } diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 97bce0c6ed5e3..a154062ba814d 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -69,19 +69,29 @@ under the License. - maven-assembly-plugin - - - jar-with-dependencies - - + maven-shade-plugin make-assembly - single + shade package + + false + true + jar-with-dependencies + + + + **/module-info.class + + + + + + + diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 524b9cd4f8aae..3dfe3bcd33f50 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -161,6 +161,7 @@ under the License. META-INF/native/libio_grpc_netty* META-INF/native/io_grpc_netty_shaded* **/*.proto + **/module-info.class diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 9c8c5df07fb78..fc6c789ee99d6 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -120,24 +120,4 @@ under the License. true - - - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.flight.sql=org.slf4j --add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - - diff --git a/java/flight/flight-sql/src/main/java/module-info.java b/java/flight/flight-sql/src/main/java/module-info.java index 5514d5b870afd..cb3835117daf6 100644 --- a/java/flight/flight-sql/src/main/java/module-info.java +++ b/java/flight/flight-sql/src/main/java/module-info.java @@ -25,5 +25,6 @@ requires org.apache.arrow.flight.core; requires org.apache.arrow.memory.core; requires org.apache.arrow.vector; + requires org.apache.commons.cli; requires protobuf.java; } diff --git a/java/flight/pom.xml b/java/flight/pom.xml index 851f44d7bf19e..55511eba82b3a 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -37,17 +37,4 @@ under the License. flight-sql-jdbc-driver flight-integration-tests - - - - pin-mockito-jdk8 - - 1.8 - - - 4.11.0 - 5.2.0 - - - diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml deleted file mode 100644 index 77184d35b5ac7..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ /dev/null @@ -1,124 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow.maven.plugins - arrow-maven-plugins - 18.0.0-SNAPSHOT - - module-info-compiler-maven-plugin - maven-plugin - - Module Info Compiler Maven Plugin - - https://arrow.apache.org - - - ${maven.version} - - - - 3.8.7 - - - - - org.glavo - module-info-compiler - 2.0 - - - org.apache.maven - maven-plugin-api - ${maven.version} - provided - - - org.apache.maven - maven-core - ${maven.version} - provided - - - org.apache.maven - maven-artifact - ${maven.version} - provided - - - org.apache.maven - maven-model - ${maven.version} - provided - - - org.apache.maven.plugin-tools - maven-plugin-annotations - ${maven.plugin.tools.version} - provided - - - - - - - - com.gradle - develocity-maven-extension - - - - - - arrow-git.properties - - - - - - - - - - - org.apache.maven.plugins - maven-plugin-plugin - - true - - - - mojo-descriptor - - descriptor - - - - help-goal - - helpmojo - - - - - - - diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java deleted file mode 100644 index 4fc8fc46e6bcc..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.maven.plugins; - -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.List; -import java.util.Optional; -import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.glavo.mic.ModuleInfoCompiler; - -/** Compiles the first module-info.java file in the project purely syntactically. */ -public abstract class BaseModuleInfoCompilerPlugin extends AbstractMojo { - protected abstract List getSourceRoots(); - - protected abstract boolean skip(); - - protected abstract String getOutputDirectory(); - - @Override - public void execute() throws MojoExecutionException { - if (skip()) { - getLog().info("Skipping module-info-compiler-maven-plugin"); - return; - } - - Optional moduleInfoFile = findFirstModuleInfo(getSourceRoots()); - if (moduleInfoFile.isPresent()) { - // The compiled module-info.class file goes into target/classes/module-info/main - Path outputDir = Paths.get(getOutputDirectory()); - - outputDir.toFile().mkdirs(); - Path targetPath = outputDir.resolve("module-info.class"); - - // Invoke the compiler, - ModuleInfoCompiler compiler = new ModuleInfoCompiler(); - try (Reader reader = - new InputStreamReader( - Files.newInputStream(moduleInfoFile.get().toPath()), StandardCharsets.UTF_8); - OutputStream output = Files.newOutputStream(targetPath)) { - compiler.compile(reader, output); - getLog().info("Successfully wrote module-info.class file."); - } catch (IOException ex) { - throw new MojoExecutionException("Error compiling module-info.java", ex); - } - } else { - getLog().info("No module-info.java file found. module-info.class file was not generated."); - } - } - - /** Finds the first module-info.java file in the set of source directories. */ - private Optional findFirstModuleInfo(List sourceDirectories) { - if (sourceDirectories == null) { - return Optional.empty(); - } - - return sourceDirectories.stream() - .map(Paths::get) - .map( - sourcePath -> - sourcePath.toFile().listFiles(file -> file.getName().equals("module-info.java"))) - .filter(matchingFiles -> matchingFiles != null && matchingFiles.length != 0) - .map(matchingFiles -> matchingFiles[0]) - .findAny(); - } -} diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java deleted file mode 100644 index e66a475dbf8be..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.maven.plugins; - -import java.util.ArrayList; -import java.util.List; -import org.apache.maven.plugins.annotations.LifecyclePhase; -import org.apache.maven.plugins.annotations.Mojo; -import org.apache.maven.plugins.annotations.Parameter; -import org.apache.maven.project.MavenProject; - -/** A maven plugin for compiler module-info files in main code with JDK8. */ -@Mojo(name = "compile", defaultPhase = LifecyclePhase.COMPILE) -public class ModuleInfoCompilerPlugin extends BaseModuleInfoCompilerPlugin { - - @Parameter( - defaultValue = "${project.compileSourceRoots}", - property = "compileSourceRoots", - required = true) - private final List compileSourceRoots = new ArrayList<>(); - - @Parameter(defaultValue = "false", property = "skip", required = false) - private boolean skip = false; - - @Parameter(defaultValue = "${project}", readonly = true, required = true) - private MavenProject project; - - @Override - protected List getSourceRoots() { - return compileSourceRoots; - } - - @Override - protected boolean skip() { - return skip; - } - - @Override - protected String getOutputDirectory() { - return project.getBuild().getOutputDirectory(); - } -} diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java deleted file mode 100644 index f18ac9faac735..0000000000000 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.maven.plugins; - -import java.util.List; -import org.apache.maven.plugins.annotations.LifecyclePhase; -import org.apache.maven.plugins.annotations.Mojo; -import org.apache.maven.plugins.annotations.Parameter; -import org.apache.maven.project.MavenProject; - -/** A maven plugin for compiler module-info files in unit tests with JDK8. */ -@Mojo(name = "testCompile", defaultPhase = LifecyclePhase.TEST_COMPILE) -public class ModuleInfoTestCompilerPlugin extends BaseModuleInfoCompilerPlugin { - - @Parameter(defaultValue = "false", property = "skip", required = false) - private boolean skip = false; - - @Parameter(defaultValue = "${project}", readonly = true, required = true) - private MavenProject project; - - @Override - protected List getSourceRoots() { - return project.getTestCompileSourceRoots(); - } - - @Override - protected boolean skip() { - return skip; - } - - @Override - protected String getOutputDirectory() { - return project.getBuild().getTestOutputDirectory(); - } -} diff --git a/java/maven/pom.xml b/java/maven/pom.xml deleted file mode 100644 index d342b629358dd..0000000000000 --- a/java/maven/pom.xml +++ /dev/null @@ -1,419 +0,0 @@ - - - - 4.0.0 - - - org.apache - apache - 31 - - - - org.apache.arrow.maven.plugins - arrow-maven-plugins - 18.0.0-SNAPSHOT - pom - - Arrow Maven Plugins - https://arrow.apache.org/ - - - - Developer List - dev-subscribe@arrow.apache.org - dev-unsubscribe@arrow.apache.org - dev@arrow.apache.org - https://lists.apache.org/list.html?dev@arrow.apache.org - - - Commits List - commits-subscribe@arrow.apache.org - commits-unsubscribe@arrow.apache.org - commits@arrow.apache.org - https://lists.apache.org/list.html?commits@arrow.apache.org - - - Issues List - issues-subscribe@arrow.apache.org - issues-unsubscribe@arrow.apache.org - https://lists.apache.org/list.html?issues@arrow.apache.org - - - GitHub List - github-subscribe@arrow.apache.org - github-unsubscribe@arrow.apache.org - https://lists.apache.org/list.html?github@arrow.apache.org - - - - - module-info-compiler-maven-plugin - - - - scm:git:https://github.com/apache/arrow.git - scm:git:https://github.com/apache/arrow.git - main - https://github.com/apache/arrow/tree/${project.scm.tag} - - - - GitHub - https://github.com/apache/arrow/issues - - - - true - - 1.8 - 1.8 - 3.13.1 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.4 - 3.2.2 - 3.6.3 - 3.5.0 - - - - - - - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 - - - pl.project13.maven - git-commit-id-plugin - 4.9.10 - - - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 - - - org.codehaus.mojo - versions-maven-plugin - 2.17.0 - - - - - - org.apache.rat - apache-rat-plugin - - false - - **/dependency-reduced-pom.xml - **/*.log - **/*.css - **/*.js - **/*.md - **/*.eps - **/*.json - **/*.seq - **/*.parquet - **/*.sql - **/arrow-git.properties - **/*.csv - **/*.csvh - **/*.csvh-test - **/*.tsv - **/*.txt - **/*.ssv - **/arrow-*.conf - **/.buildpath - **/*.proto - **/*.fmpp - **/target/** - **/*.tdd - **/*.project - **/TAGS - **/*.checkstyle - **/.classpath - **/.factorypath - **/.settings/** - .*/** - **/*.patch - **/*.pb.cc - **/*.pb.h - **/*.linux - **/client/build/** - **/*.tbl - **/*.iml - **/flight.properties - **/*.idea/** - - - - - rat-checks - - check - - validate - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - **/logging.properties - **/logback-test.xml - **/logback.out.xml - **/logback.xml - - - - org.apache.arrow - ${username} - https://arrow.apache.org/ - - - - - - - test-jar - - - true - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - 2048m - true - - - - maven-enforcer-plugin - - - avoid_bad_dependencies - - enforce - - verify - - - - - commons-logging - javax.servlet:servlet-api - org.mortbay.jetty:servlet-api - org.mortbay.jetty:servlet-api-2.5 - log4j:log4j - - - - - - - - - pl.project13.maven - git-commit-id-plugin - - dd.MM.yyyy '@' HH:mm:ss z - false - false - true - false - - false - false - 7 - -dirty - true - - - - - for-jars - - revision - - true - - target/classes/arrow-git.properties - - - - for-source-tarball - - revision - - false - - ./arrow-git.properties - - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - ../dev/checkstyle/checkstyle.xml - ../dev/license/asf-java.license - ../dev/checkstyle/suppressions.xml - true - UTF-8 - true - ${checkstyle.failOnViolation} - ${checkstyle.failOnViolation} - warning - xml - ${project.build.directory}/test/checkstyle-errors.xml - false - - - - com.puppycrawl.tools - checkstyle - 8.29 - - - org.slf4j - jcl-over-slf4j - 2.0.13 - - - - - validate - - check - - validate - - - - - org.cyclonedx - cyclonedx-maven-plugin - - - - makeBom - - package - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license - (<configuration|<project) - - - - - - 1.7 - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license - package - - - - - - spotless-check - - check - - - - - - - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - - - - - apache-release - - - - org.apache.maven.plugins - maven-assembly-plugin - - - source-release-assembly - - - true - - - - - - - - - diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 95ef16aaa1cfe..ce78fc479232a 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -51,10 +51,35 @@ under the License. + + org.apache.maven.plugins + maven-compiler-plugin + + + -Xmaxerrs + + 10000 + -Xmaxwarns + 10000 + -AskipDefs=.*Test + + -AatfDoNotCache + + + + + org.checkerframework + checker + ${checker.framework.version} + + + + org.apache.maven.plugins maven-surefire-plugin + --add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED **/TestOpens.java @@ -65,27 +90,6 @@ under the License. - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - **/TestOpens.java - - - - - - opens-tests @@ -118,40 +122,5 @@ under the License. - - - checkerframework-jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Xmaxerrs - - 10000 - -Xmaxwarns - 10000 - -AskipDefs=.*Test - - -AatfDoNotCache - - - - - org.checkerframework - checker - ${checker.framework.version} - - - - - - - diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java index 52fcb52d014a5..e2a07626c386f 100644 --- a/java/memory/memory-core/src/main/java/module-info.java +++ b/java/memory/memory-core/src/main/java/module-info.java @@ -22,7 +22,10 @@ exports org.apache.arrow.memory.util.hash; exports org.apache.arrow.util; + requires java.compiler; requires transitive jdk.unsupported; requires jsr305; + requires static org.checkerframework.checker.qual; + requires static org.immutables.value.annotations; requires org.slf4j; } diff --git a/java/pom.xml b/java/pom.xml index 4ce0c1981d295..4228496ef682f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -65,7 +65,6 @@ under the License. - maven bom format memory @@ -98,7 +97,7 @@ under the License. 2.0.13 33.2.1-jre 4.1.110.Final - 1.63.0 + 1.65.0 3.25.1 2.17.2 3.4.0 @@ -115,8 +114,10 @@ under the License. none -Xdoclint:none - 1.8 - 1.8 + 11 + 11 + 11 + 11 3.12.0 3.2.5 0.16.1 @@ -298,8 +299,6 @@ under the License. maven-compiler-plugin true - **/module-info.java - **/module-info.java false @@ -313,6 +312,7 @@ under the License. maven-surefire-plugin + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED true true ${forkCount} @@ -325,11 +325,13 @@ under the License. which in turn can cause OOM. --> 1048576 + false maven-failsafe-plugin + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED ${project.build.directory} true @@ -444,13 +446,9 @@ under the License. **/module-info.java + arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance - - org.apache.arrow.maven.plugins - module-info-compiler-maven-plugin - ${project.version} - com.gradle develocity-maven-extension @@ -491,6 +489,7 @@ under the License. com.google.protobuf:protoc:${dep.protobuf-bom.version}:exe:${os.detected.classifier} grpc-java io.grpc:protoc-gen-grpc-java:${dep.grpc-bom.version}:exe:${os.detected.classifier} + @generated=omit @@ -779,24 +778,6 @@ under the License. - - org.apache.arrow.maven.plugins - module-info-compiler-maven-plugin - - - default-compile - - compile - - - - default-testCompile - - testCompile - - - - org.apache.maven.plugins maven-project-info-reports-plugin @@ -856,6 +837,7 @@ under the License. **/module-info.java + arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance @@ -917,56 +899,13 @@ under the License. - error-prone-jdk8 + error-prone - 1.8 - - !m2e.version - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -XDcompilePolicy=simple - -Xplugin:ErrorProne - -J-Xbootclasspath/p:${settings.localRepository}/com/google/errorprone/javac/${errorprone.javac.version}/javac-${errorprone.javac.version}.jar - - - - com.google.errorprone - error_prone_core - - 2.10.0 - - - - - - - - - - error-prone-jdk11+ - - [11,] !m2e.version @@ -1003,30 +942,6 @@ under the License. - - jdk11+ - - [11,] - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - org.apache.maven.plugins - maven-failsafe-plugin - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - code-coverage diff --git a/java/tools/pom.xml b/java/tools/pom.xml index b69d24786cb14..9c52e21402bcd 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -97,19 +97,29 @@ under the License. - maven-assembly-plugin - - - jar-with-dependencies - - + maven-shade-plugin make-assembly - single + shade package + + false + true + jar-with-dependencies + + + + **/module-info.class + + + + + + + diff --git a/java/tools/src/main/java/module-info.java b/java/tools/src/main/java/module-info.java index 6b4329eb84f2a..4f0817f9a11f6 100644 --- a/java/tools/src/main/java/module-info.java +++ b/java/tools/src/main/java/module-info.java @@ -19,9 +19,9 @@ exports org.apache.arrow.tools; requires com.fasterxml.jackson.databind; - requires com.google.common; requires org.apache.arrow.compression; requires org.apache.arrow.memory.core; requires org.apache.arrow.vector; + requires org.apache.commons.cli; requires org.slf4j; } From 0ea00f4b7ded95757b6651a1d58a525a7eb8866d Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Thu, 18 Jul 2024 06:24:42 +0900 Subject: [PATCH 088/122] GH-43293: [Docs] Update code block for Installing Java Modules (#43295) ### Rationale for this change Fix wrong code block formatting in implementations (Java) for Installing Java Modules. ### What changes are included in this PR? - Fixed code block. ### Are these changes tested? Yes. I have verified the changes by building the documentation. ### Are there any user-facing changes? Yes. the updated documentation will be visible to users. * GitHub Issue: #43293 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- docs/source/java/install.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index 3bdd416b8e792..95307c8c1c5fd 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -67,6 +67,7 @@ org.apache.arrow.flight.core does not read unnamed module ...`` Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed. Modifying the command above for arrow-memory: + .. code-block:: shell # Directly on the command line From cf9f3db95cac418e149a1e9fffef310fa0decbaf Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Wed, 17 Jul 2024 17:48:58 -0400 Subject: [PATCH 089/122] GH-43276: [Go][Parquet] Make DeltaBitPacking Encoders/Decoders Generic (#43279) ### Rationale for this change Fixes: #43276 The slice of `miniBlockValues` in the decoder was getting reset between calls to `Decode`, while the remaining internal state of the decoder was persisted. By making the decoder generic, it became possible to bring `miniBlockValues` into the base decoder. The encoder was also made generic for consistency. By avoiding reflection, encoding performance is improved (from around `700 MB/s` before to about `1100 MB/s` after on my machine). ### What changes are included in this PR? - Passing test case reproducing the reported bug. - Benchmarks for `DeltaBinaryPacked` - Refactor of `deltaBitPackDecoder` and `deltaBitPackEncoder` to make them generic. ### Are these changes tested? Yes ### Are there any user-facing changes? The DeltaBitPackDecoders should no longer panic when decoding multiple batches from a single page. * GitHub Issue: #43276 Authored-by: Joel Lubinitsky Signed-off-by: Joel Lubinitsky --- go/parquet/file/file_reader_test.go | 62 +++++ .../internal/encoding/delta_bit_packing.go | 221 +++++------------- .../internal/encoding/delta_byte_array.go | 13 +- .../encoding/delta_length_byte_array.go | 6 +- .../encoding/encoding_benchmarks_test.go | 45 ++++ .../internal/encoding/typed_encoder.gen.go | 27 +-- .../encoding/typed_encoder.gen.go.tmpl | 15 +- 7 files changed, 197 insertions(+), 192 deletions(-) diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index 7d20bbe1006f8..547ec475c2720 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -18,6 +18,7 @@ package file_test import ( "bytes" + "context" "crypto/rand" "encoding/binary" "fmt" @@ -26,6 +27,8 @@ import ( "path" "testing" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" "github.com/apache/arrow/go/v18/arrow/memory" "github.com/apache/arrow/go/v18/internal/utils" "github.com/apache/arrow/go/v18/parquet" @@ -35,6 +38,7 @@ import ( format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet" "github.com/apache/arrow/go/v18/parquet/internal/thrift" "github.com/apache/arrow/go/v18/parquet/metadata" + "github.com/apache/arrow/go/v18/parquet/pqarrow" "github.com/apache/arrow/go/v18/parquet/schema" libthrift "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" @@ -582,3 +586,61 @@ func TestByteStreamSplitEncodingFileRead(t *testing.T) { }) } } + +func TestDeltaBinaryPackedMultipleBatches(t *testing.T) { + size := 10 + batchSize := size / 2 // write 2 batches + + // Define the schema for the test data + fields := []arrow.Field{ + {Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, + } + schema := arrow.NewSchema(fields, nil) + + // Create a record batch with the test data + b := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer b.Release() + + for i := 0; i < size; i++ { + b.Field(0).(*array.Int64Builder).Append(int64(i)) + } + rec := b.NewRecord() + defer rec.Release() + + // Write the data to Parquet using the file writer + props := parquet.NewWriterProperties( + parquet.WithDictionaryDefault(false), + parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked)) + writerProps := pqarrow.DefaultWriterProps() + + var buf bytes.Buffer + pw, err := pqarrow.NewFileWriter(schema, &buf, props, writerProps) + require.NoError(t, err) + require.NoError(t, pw.Write(rec)) + require.NoError(t, pw.Close()) + + // Read the data back from the Parquet file + reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + defer reader.Close() + + pr, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{BatchSize: int64(batchSize)}, memory.DefaultAllocator) + require.NoError(t, err) + + rr, err := pr.GetRecordReader(context.Background(), nil, nil) + require.NoError(t, err) + + totalRows := 0 + for rr.Next() { + rec := rr.Record() + for i := 0; i < int(rec.NumRows()); i++ { + col := rec.Column(0).(*array.Int64) + + val := col.Value(i) + require.Equal(t, val, int64(totalRows+i)) + } + totalRows += int(rec.NumRows()) + } + + require.Equalf(t, size, totalRows, "Expected %d rows, but got %d rows", size, totalRows) +} diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index ca1ed14511f43..ac91953a7f903 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -19,9 +19,9 @@ package encoding import ( "bytes" "errors" + "fmt" "math" "math/bits" - "reflect" "github.com/apache/arrow/go/v18/arrow" "github.com/apache/arrow/go/v18/arrow/memory" @@ -32,7 +32,7 @@ import ( // see the deltaBitPack encoder for a description of the encoding format that is // used for delta-bitpacking. -type deltaBitPackDecoder struct { +type deltaBitPackDecoder[T int32 | int64] struct { decoder mem memory.Allocator @@ -52,18 +52,20 @@ type deltaBitPackDecoder struct { totalValues uint64 lastVal int64 + + miniBlockValues []T } // returns the number of bytes read so far -func (d *deltaBitPackDecoder) bytesRead() int64 { +func (d *deltaBitPackDecoder[T]) bytesRead() int64 { return d.bitdecoder.CurOffset() } -func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem } +func (d *deltaBitPackDecoder[T]) Allocator() memory.Allocator { return d.mem } // SetData sets the bytes and the expected number of values to decode // into the decoder, updating the decoder and allowing it to be reused. -func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error { +func (d *deltaBitPackDecoder[T]) SetData(nvalues int, data []byte) error { // set our data into the underlying decoder for the type if err := d.decoder.SetData(nvalues, data); err != nil { return err @@ -103,7 +105,7 @@ func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error { } // initialize a block to decode -func (d *deltaBitPackDecoder) initBlock() error { +func (d *deltaBitPackDecoder[T]) initBlock() error { // first we grab the min delta value that we'll start from var ok bool if d.minDelta, ok = d.bitdecoder.GetZigZagVlqInt(); !ok { @@ -126,16 +128,9 @@ func (d *deltaBitPackDecoder) initBlock() error { return nil } -// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm. -type DeltaBitPackInt32Decoder struct { - *deltaBitPackDecoder - - miniBlockValues []int32 -} - -func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { +func (d *deltaBitPackDecoder[T]) unpackNextMini() error { if d.miniBlockValues == nil { - d.miniBlockValues = make([]int32, 0, int(d.valsPerMini)) + d.miniBlockValues = make([]T, 0, int(d.valsPerMini)) } else { d.miniBlockValues = d.miniBlockValues[:0] } @@ -149,7 +144,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { } d.lastVal += int64(delta) + int64(d.minDelta) - d.miniBlockValues = append(d.miniBlockValues, int32(d.lastVal)) + d.miniBlockValues = append(d.miniBlockValues, T(d.lastVal)) } d.miniBlockIdx++ return nil @@ -157,15 +152,15 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. -func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { - max := shared_utils.Min(len(out), int(d.totalValues)) +func (d *deltaBitPackDecoder[T]) Decode(out []T) (int, error) { + max := shared_utils.Min(len(out), int(d.nvals)) if max == 0 { return 0, nil } out = out[:max] if !d.usedFirst { // starting value to calculate deltas against - out[0] = int32(d.lastVal) + out[0] = T(d.lastVal) out = out[1:] d.usedFirst = true } @@ -198,7 +193,7 @@ func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { } // DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap -func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { +func (d *deltaBitPackDecoder[T]) DecodeSpaced(out []T, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { toread := len(out) - nullCount values, err := d.Decode(out[:toread]) if err != nil { @@ -211,101 +206,23 @@ func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, vali return spacedExpand(out, nullCount, validBits, validBitsOffset), nil } -// Type returns the physical parquet type that this decoder decodes, in this case Int32 -func (DeltaBitPackInt32Decoder) Type() parquet.Type { - return parquet.Types.Int32 -} - -// DeltaBitPackInt64Decoder decodes a delta bit packed int64 column of data. -type DeltaBitPackInt64Decoder struct { - *deltaBitPackDecoder - - miniBlockValues []int64 -} - -func (d *DeltaBitPackInt64Decoder) unpackNextMini() error { - if d.miniBlockValues == nil { - d.miniBlockValues = make([]int64, 0, int(d.valsPerMini)) - } else { - d.miniBlockValues = d.miniBlockValues[:0] - } - - d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)] - d.currentMiniBlockVals = d.valsPerMini - - for j := 0; j < int(d.valsPerMini); j++ { - delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth)) - if !ok { - return errors.New("parquet: eof exception") - } - - d.lastVal += int64(delta) + d.minDelta - d.miniBlockValues = append(d.miniBlockValues, d.lastVal) - } - d.miniBlockIdx++ - return nil -} - -// Decode retrieves min(remaining values, len(out)) values from the data and returns the number -// of values actually decoded and any errors encountered. -func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) { - max := shared_utils.Min(len(out), d.nvals) - if max == 0 { - return 0, nil - } - - out = out[:max] - if !d.usedFirst { - out[0] = d.lastVal - out = out[1:] - d.usedFirst = true - } - - var err error - for len(out) > 0 { - if d.currentBlockVals == 0 { - err = d.initBlock() - if err != nil { - return 0, err - } - } - if d.currentMiniBlockVals == 0 { - err = d.unpackNextMini() - } - - if err != nil { - return 0, err - } - - start := int(d.valsPerMini - d.currentMiniBlockVals) - numCopied := copy(out, d.miniBlockValues[start:]) - - out = out[numCopied:] - d.currentBlockVals -= uint32(numCopied) - d.currentMiniBlockVals -= uint32(numCopied) +// Type returns the underlying physical type this decoder works with +func (dec *deltaBitPackDecoder[T]) Type() parquet.Type { + switch v := any(dec).(type) { + case *deltaBitPackDecoder[int32]: + return parquet.Types.Int32 + case *deltaBitPackDecoder[int64]: + return parquet.Types.Int64 + default: + panic(fmt.Sprintf("deltaBitPackDecoder is not supported for type: %T", v)) } - d.nvals -= max - return max, nil -} - -// Type returns the physical parquet type that this decoder decodes, in this case Int64 -func (DeltaBitPackInt64Decoder) Type() parquet.Type { - return parquet.Types.Int64 } -// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap -func (d DeltaBitPackInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - toread := len(out) - nullCount - values, err := d.Decode(out[:toread]) - if err != nil { - return values, err - } - if values != toread { - return values, errors.New("parquet: number of values / definition levels read did not match") - } +// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm. +type DeltaBitPackInt32Decoder = deltaBitPackDecoder[int32] - return spacedExpand(out, nullCount, validBits, validBitsOffset), nil -} +// DeltaBitPackInt64Decoder decodes Int64 values which are packed using the Delta BitPacking algorithm. +type DeltaBitPackInt64Decoder = deltaBitPackDecoder[int64] const ( // block size must be a multiple of 128 @@ -333,7 +250,7 @@ const ( // // Sets aside bytes at the start of the internal buffer where the header will be written, // and only writes the header when FlushValues is called before returning it. -type deltaBitPackEncoder struct { +type deltaBitPackEncoder[T int32 | int64] struct { encoder bitWriter *utils.BitWriter @@ -348,7 +265,7 @@ type deltaBitPackEncoder struct { } // flushBlock flushes out a finished block for writing to the underlying encoder -func (enc *deltaBitPackEncoder) flushBlock() { +func (enc *deltaBitPackEncoder[T]) flushBlock() { if len(enc.deltas) == 0 { return } @@ -400,9 +317,8 @@ func (enc *deltaBitPackEncoder) flushBlock() { // putInternal is the implementation for actually writing data which must be // integral data as int, int8, int32, or int64. -func (enc *deltaBitPackEncoder) putInternal(data interface{}) { - v := reflect.ValueOf(data) - if v.Len() == 0 { +func (enc *deltaBitPackEncoder[T]) Put(in []T) { + if len(in) == 0 { return } @@ -412,16 +328,16 @@ func (enc *deltaBitPackEncoder) putInternal(data interface{}) { enc.numMiniBlocks = defaultNumMiniBlocks enc.miniBlockSize = defaultNumValuesPerMini - enc.firstVal = v.Index(0).Int() + enc.firstVal = int64(in[0]) enc.currentVal = enc.firstVal idx = 1 enc.bitWriter = utils.NewBitWriter(enc.sink) } - enc.totalVals += uint64(v.Len()) - for ; idx < v.Len(); idx++ { - val := v.Index(idx).Int() + enc.totalVals += uint64(len(in)) + for ; idx < len(in); idx++ { + val := int64(in[idx]) enc.deltas = append(enc.deltas, val-enc.currentVal) enc.currentVal = val if len(enc.deltas) == int(enc.blockSize) { @@ -432,7 +348,7 @@ func (enc *deltaBitPackEncoder) putInternal(data interface{}) { // FlushValues flushes any remaining data and returns the finished encoded buffer // or returns nil and any error encountered during flushing. -func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) { +func (enc *deltaBitPackEncoder[T]) FlushValues() (Buffer, error) { if enc.bitWriter != nil { // write any remaining values enc.flushBlock() @@ -465,7 +381,7 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) { } // EstimatedDataEncodedSize returns the current amount of data actually flushed out and written -func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 { +func (enc *deltaBitPackEncoder[T]) EstimatedDataEncodedSize() int64 { if enc.bitWriter == nil { return 0 } @@ -473,56 +389,33 @@ func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 { return int64(enc.bitWriter.Written()) } -// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data. -type DeltaBitPackInt32Encoder struct { - *deltaBitPackEncoder -} - -// Put writes the values from the provided slice of int32 to the encoder -func (enc DeltaBitPackInt32Encoder) Put(in []int32) { - enc.putInternal(in) -} - -// PutSpaced takes a slice of int32 along with a bitmap that describes the nulls and an offset into the bitmap +// PutSpaced takes a slice of values along with a bitmap that describes the nulls and an offset into the bitmap // in order to write spaced data to the encoder. -func (enc DeltaBitPackInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) { +func (enc *deltaBitPackEncoder[T]) PutSpaced(in []T, validBits []byte, validBitsOffset int64) { buffer := memory.NewResizableBuffer(enc.mem) - buffer.Reserve(arrow.Int32Traits.BytesRequired(len(in))) + dt := arrow.GetDataType[T]().(arrow.FixedWidthDataType) + buffer.Reserve(dt.Bytes() * len(in)) defer buffer.Release() - data := arrow.Int32Traits.CastFromBytes(buffer.Buf()) + data := arrow.GetData[T](buffer.Buf()) nvalid := spacedCompress(in, data, validBits, validBitsOffset) enc.Put(data[:nvalid]) } -// Type returns the underlying physical type this encoder works with, in this case Int32 -func (DeltaBitPackInt32Encoder) Type() parquet.Type { - return parquet.Types.Int32 -} - -// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data. -type DeltaBitPackInt64Encoder struct { - *deltaBitPackEncoder -} - -// Put writes the values from the provided slice of int64 to the encoder -func (enc DeltaBitPackInt64Encoder) Put(in []int64) { - enc.putInternal(in) +// Type returns the underlying physical type this encoder works with +func (dec *deltaBitPackEncoder[T]) Type() parquet.Type { + switch v := any(dec).(type) { + case *deltaBitPackEncoder[int32]: + return parquet.Types.Int32 + case *deltaBitPackEncoder[int64]: + return parquet.Types.Int64 + default: + panic(fmt.Sprintf("deltaBitPackEncoder is not supported for type: %T", v)) + } } -// PutSpaced takes a slice of int64 along with a bitmap that describes the nulls and an offset into the bitmap -// in order to write spaced data to the encoder. -func (enc DeltaBitPackInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) { - buffer := memory.NewResizableBuffer(enc.mem) - buffer.Reserve(arrow.Int64Traits.BytesRequired(len(in))) - defer buffer.Release() +// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for Int32 data. +type DeltaBitPackInt32Encoder = deltaBitPackEncoder[int32] - data := arrow.Int64Traits.CastFromBytes(buffer.Buf()) - nvalid := spacedCompress(in, data, validBits, validBitsOffset) - enc.Put(data[:nvalid]) -} - -// Type returns the underlying physical type this encoder works with, in this case Int64 -func (DeltaBitPackInt64Encoder) Type() parquet.Type { - return parquet.Types.Int64 -} +// DeltaBitPackInt64Encoder is an encoder for the delta bitpacking encoding for Int64 data. +type DeltaBitPackInt64Encoder = deltaBitPackEncoder[int64] diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index e7990f0dacbe8..62c8d08999972 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -53,11 +53,14 @@ func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 { func (enc *DeltaByteArrayEncoder) initEncoders() { enc.prefixEncoder = &DeltaBitPackInt32Encoder{ - deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}} + encoder: newEncoderBase(enc.encoding, nil, enc.mem), + } enc.suffixEncoder = &DeltaLengthByteArrayEncoder{ newEncoderBase(enc.encoding, nil, enc.mem), &DeltaBitPackInt32Encoder{ - deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}} + encoder: newEncoderBase(enc.encoding, nil, enc.mem), + }, + } } // Type returns the underlying physical type this operates on, in this case ByteArrays only @@ -160,9 +163,9 @@ func (d *DeltaByteArrayDecoder) Allocator() memory.Allocator { return d.mem } // blocks of suffix data in order to initialize the decoder. func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error { prefixLenDec := DeltaBitPackInt32Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(d.encoding, d.descr), - mem: d.mem}} + decoder: newDecoderBase(d.encoding, d.descr), + mem: d.mem, + } if err := prefixLenDec.SetData(nvalues, data); err != nil { return err diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index b72960fe438ad..87c48d574ed68 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -110,9 +110,9 @@ func (d *DeltaLengthByteArrayDecoder) Allocator() memory.Allocator { return d.me // followed by the rest of the byte array data immediately after. func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { dec := DeltaBitPackInt32Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(d.encoding, d.descr), - mem: d.mem}} + decoder: newDecoderBase(d.encoding, d.descr), + mem: d.mem, + } if err := dec.SetData(nvalues, data); err != nil { return err diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go index 95c0b3861bc05..2ca414eec6b90 100644 --- a/go/parquet/internal/encoding/encoding_benchmarks_test.go +++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go @@ -634,3 +634,48 @@ func BenchmarkByteStreamSplitDecodingFixedLenByteArray(b *testing.B) { }) } } + +func BenchmarkDeltaBinaryPackedEncodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + encoder.Put(values) + buf, _ := encoder.FlushValues() + buf.Release() + } + }) + } +} + +func BenchmarkDeltaBinaryPackedDecodingInt32(b *testing.B) { + for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 { + b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) { + output := make([]int32, sz) + values := make([]int32, sz) + for idx := range values { + values[idx] = 64 + } + encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, + false, nil, memory.DefaultAllocator).(encoding.Int32Encoder) + encoder.Put(values) + buf, _ := encoder.FlushValues() + defer buf.Release() + + decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, nil, memory.DefaultAllocator) + b.ResetTimer() + b.SetBytes(int64(len(values) * arrow.Int32SizeBytes)) + for n := 0; n < b.N; n++ { + decoder.SetData(sz, buf.Bytes()) + decoder.(encoding.Int32Decoder).Decode(output) + } + }) + } +} diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index 3a960e2c62332..e67c976adc042 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -86,8 +86,9 @@ func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema case format.Encoding_PLAIN: return &PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)} case format.Encoding_DELTA_BINARY_PACKED: - return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{ - encoder: newEncoderBase(e, descr, mem)}} + return &DeltaBitPackInt32Encoder{ + encoder: newEncoderBase(e, descr, mem), + } case format.Encoding_BYTE_STREAM_SPLIT: return &ByteStreamSplitInt32Encoder{PlainInt32Encoder: PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}} default: @@ -118,10 +119,9 @@ func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD mem = memory.DefaultAllocator } return &DeltaBitPackInt32Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(format.Encoding(e), descr), - mem: mem, - }} + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } case parquet.Encodings.ByteStreamSplit: return &ByteStreamSplitInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: @@ -327,8 +327,9 @@ func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema case format.Encoding_PLAIN: return &PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)} case format.Encoding_DELTA_BINARY_PACKED: - return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{ - encoder: newEncoderBase(e, descr, mem)}} + return &DeltaBitPackInt64Encoder{ + encoder: newEncoderBase(e, descr, mem), + } case format.Encoding_BYTE_STREAM_SPLIT: return &ByteStreamSplitInt64Encoder{PlainInt64Encoder: PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}} default: @@ -359,10 +360,9 @@ func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD mem = memory.DefaultAllocator } return &DeltaBitPackInt64Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(format.Encoding(e), descr), - mem: mem, - }} + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } case parquet.Encodings.ByteStreamSplit: return &ByteStreamSplitInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)} default: @@ -1306,7 +1306,8 @@ func (byteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sc return &DeltaLengthByteArrayEncoder{ encoder: newEncoderBase(e, descr, mem), lengthEncoder: &DeltaBitPackInt32Encoder{ - &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}}, + encoder: newEncoderBase(e, descr, mem), + }, } case format.Encoding_DELTA_BYTE_ARRAY: return &DeltaByteArrayEncoder{ diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index 079c1aad6bd3f..601d90712baa6 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -79,15 +79,17 @@ func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *s {{- end}} {{- if or (eq .Name "Int32") (eq .Name "Int64")}} case format.Encoding_DELTA_BINARY_PACKED: - return DeltaBitPack{{.Name}}Encoder{&deltaBitPackEncoder{ - encoder: newEncoderBase(e, descr, mem)}} + return &DeltaBitPack{{.Name}}Encoder{ + encoder: newEncoderBase(e, descr, mem), + } {{- end}} {{- if eq .Name "ByteArray"}} case format.Encoding_DELTA_LENGTH_BYTE_ARRAY: return &DeltaLengthByteArrayEncoder{ encoder: newEncoderBase(e, descr, mem), lengthEncoder: &DeltaBitPackInt32Encoder{ - &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}}, + encoder: newEncoderBase(e, descr, mem), + }, } case format.Encoding_DELTA_BYTE_ARRAY: return &DeltaByteArrayEncoder{ @@ -135,10 +137,9 @@ func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, mem = memory.DefaultAllocator } return &DeltaBitPack{{.Name}}Decoder{ - deltaBitPackDecoder: &deltaBitPackDecoder{ - decoder: newDecoderBase(format.Encoding(e), descr), - mem: mem, - }} + decoder: newDecoderBase(format.Encoding(e), descr), + mem: mem, + } {{- end}} {{- if eq .Name "ByteArray"}} case parquet.Encodings.DeltaLengthByteArray: From b382156cfc4288584394b95cebccb425a93887d9 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Wed, 17 Jul 2024 18:34:01 -0400 Subject: [PATCH 090/122] GH-43314: [CI][Java] Delete arrow-maven-plugins from release script (#43313) We missed this when merging Java 8 deprecation. Authored-by: Dane Pitkin Signed-off-by: Dane Pitkin --- dev/release/utils-prepare.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 760a7f404a74d..6ba8b22a06e89 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -88,7 +88,6 @@ update_versions() { # versions-maven-plugin:set-scm-tag does not update the whole reactor. Invoking separately mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-java-root mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-bom - mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-maven-plugins fi git add "pom.xml" git add "**/pom.xml" From 0bae073d1abe439d113cc12e06e7ada886a2f2fd Mon Sep 17 00:00:00 2001 From: abandy Date: Wed, 17 Jul 2024 20:52:59 -0400 Subject: [PATCH 091/122] GH-43168: [Swift] Add buffer and array builders for Struct type (#43171) ### Rationale for this change The struct type requires additions/modifications to builders for buffers and arrays. This is needed in order match functionality with the other supported types. ### What changes are included in this PR? Updates to buffer and array builders for Struct type. ### Are these changes tested? Yes, Array test has been added for the Struct type using the Struct builders **This PR includes breaking changes to public APIs.** The init method for ArrowArray currently does not throw. This PR will change this behavior and allow the init method to throw. External code that calls init will need to be updated to catch any errors. * GitHub Issue: #43168 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowArray.swift | 46 ++++--- .../Sources/Arrow/ArrowArrayBuilder.swift | 118 +++++++++++++++++- .../Sources/Arrow/ArrowBufferBuilder.swift | 72 +++++++++-- .../Sources/Arrow/ArrowReaderHelper.swift | 16 +-- swift/Arrow/Tests/ArrowTests/ArrayTests.swift | 61 +++++++++ 5 files changed, 269 insertions(+), 44 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift index b0f20ee06c2e4..4fc1b8b9fc71c 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArray.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift @@ -78,41 +78,41 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { _ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder { switch arrowType.id { case .int8: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .int16: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .int32: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .int64: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint8: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint16: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint32: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint64: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .double: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .float: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .date32: - return ArrowArrayHolderImpl(Date32Array(with)) + return try ArrowArrayHolderImpl(Date32Array(with)) case .date64: - return ArrowArrayHolderImpl(Date64Array(with)) + return try ArrowArrayHolderImpl(Date64Array(with)) case .time32: - return ArrowArrayHolderImpl(Time32Array(with)) + return try ArrowArrayHolderImpl(Time32Array(with)) case .time64: - return ArrowArrayHolderImpl(Time64Array(with)) + return try ArrowArrayHolderImpl(Time64Array(with)) case .string: - return ArrowArrayHolderImpl(StringArray(with)) + return try ArrowArrayHolderImpl(StringArray(with)) case .boolean: - return ArrowArrayHolderImpl(BoolArray(with)) + return try ArrowArrayHolderImpl(BoolArray(with)) case .binary: - return ArrowArrayHolderImpl(BinaryArray(with)) + return try ArrowArrayHolderImpl(BinaryArray(with)) case .strct: - return ArrowArrayHolderImpl(StructArray(with)) + return try ArrowArrayHolderImpl(StructArray(with)) default: throw ArrowError.invalid("Array not found for type: \(arrowType)") } @@ -125,7 +125,7 @@ public class ArrowArray: AsString, AnyArray { public var nullCount: UInt {return self.arrowData.nullCount} public var length: UInt {return self.arrowData.length} - public required init(_ arrowData: ArrowData) { + public required init(_ arrowData: ArrowData) throws { self.arrowData = arrowData } @@ -277,18 +277,14 @@ public class BinaryArray: ArrowArray { public class StructArray: ArrowArray<[Any?]> { public private(set) var arrowFields: [ArrowArrayHolder]? - public required init(_ arrowData: ArrowData) { - super.init(arrowData) - } - - public func initialize() throws -> StructArray { + public required init(_ arrowData: ArrowData) throws { + try super.init(arrowData) var fields = [ArrowArrayHolder]() for child in arrowData.children { fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child)) } self.arrowFields = fields - return self } public override subscript(_ index: UInt) -> [Any?]? { diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index dc80f52f8ebd2..005cad79daeda 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -58,7 +58,8 @@ public class ArrowArrayBuilder> public func finish() throws -> ArrowArray { let buffers = self.bufferBuilder.finish() let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount) - return U(arrowData) + let array = try U(arrowData) + return array } public func getStride() -> Int { @@ -118,6 +119,55 @@ public class Time64ArrayBuilder: ArrowArrayBuilder, T } } +public class StructArrayBuilder: ArrowArrayBuilder { + let builders: [any ArrowArrayHolderBuilder] + let fields: [ArrowField] + public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws { + self.fields = fields + self.builders = builders + try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) + self.bufferBuilder.initializeTypeInfo(fields) + } + + public init(_ fields: [ArrowField]) throws { + self.fields = fields + var builders = [any ArrowArrayHolderBuilder]() + for field in fields { + builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type)) + } + + self.builders = builders + try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) + } + + public override func append(_ values: [Any?]?) { + self.bufferBuilder.append(values) + if let anyValues = values { + for index in 0.. StructArray { + let buffers = self.bufferBuilder.finish() + var childData = [ArrowData]() + for builder in self.builders { + childData.append(try builder.toHolder().array.arrowData) + } + + let arrowData = try ArrowData(self.type, buffers: buffers, + children: childData, nullCount: self.nullCount, + length: self.length) + let structArray = try StructArray(arrowData) + return structArray + } +} + public class ArrowArrayBuilders { public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity _ builderType: Any.Type) throws -> ArrowArrayHolderBuilder { @@ -168,6 +218,72 @@ public class ArrowArrayBuilders { type == Float.self || type == Date.self } + public static func loadStructArrayBuilderForType(_ obj: T) throws -> StructArrayBuilder { + let mirror = Mirror(reflecting: obj) + var builders = [ArrowArrayHolderBuilder]() + var fields = [ArrowField]() + for (property, value) in mirror.children { + guard let propertyName = property else { + continue + } + + let builderType = type(of: value) + let arrowType = ArrowType(ArrowType.infoForType(builderType)) + fields.append(ArrowField(propertyName, type: arrowType, isNullable: true)) + builders.append(try loadBuilder(arrowType: arrowType)) + } + + return try StructArrayBuilder(fields, builders: builders) + } + + public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity + arrowType: ArrowType) throws -> ArrowArrayHolderBuilder { + switch arrowType.id { + case .uint8: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint16: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint32: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint64: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int8: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int16: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int32: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int64: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .double: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .float: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .string: + return try StringArrayBuilder() + case .boolean: + return try BoolArrayBuilder() + case .binary: + return try BinaryArrayBuilder() + case .date32: + return try Date32ArrayBuilder() + case .date64: + return try Date64ArrayBuilder() + case .time32: + guard let timeType = arrowType as? ArrowTypeTime32 else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try Time32ArrayBuilder(timeType.unit) + case .time64: + guard let timeType = arrowType as? ArrowTypeTime64 else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try Time64ArrayBuilder(timeType.unit) + default: + throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") + } + } + public static func loadNumberArrayBuilder() throws -> NumberArrayBuilder { let type = T.self if type == Int8.self { diff --git a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift index e4c8036c327d1..47f9c40354b1b 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift @@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder { func finish() -> [ArrowBuffer] } -public class BaseBufferBuilder { - var values: ArrowBuffer +public class BaseBufferBuilder { var nulls: ArrowBuffer - var stride: Int public var offset: UInt = 0 - public var capacity: UInt {return self.values.capacity} + public var capacity: UInt {return self.nulls.capacity} public var length: UInt = 0 public var nullCount: UInt = 0 - init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { - self.stride = stride - self.values = values + init(_ nulls: ArrowBuffer) { self.nulls = nulls } @@ -61,7 +57,19 @@ public class BaseBufferBuilder { } } -public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class ValuesBufferBuilder: BaseBufferBuilder { + var values: ArrowBuffer + var stride: Int + public override var capacity: UInt {return self.values.capacity} + + init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { + self.stride = stride + self.values = values + super.init(nulls) + } +} + +public class FixedBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = T private let defaultVal: ItemType public required init() throws { @@ -138,7 +146,7 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } -public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class BoolBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = Bool public required init() throws { let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) @@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } -public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class VariableBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = T var offsets: ArrowBuffer let binaryStride = MemoryLayout.stride @@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder { } } } + +public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { + public typealias ItemType = [Any?] + var info: ArrowNestedType? + public init() throws { + let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) + super.init(nulls) + } + + public func initializeTypeInfo(_ fields: [ArrowField]) { + info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + } + + public func append(_ newValue: [Any?]?) { + let index = UInt(self.length) + self.length += 1 + if length > self.nulls.length { + self.resize(length) + } + + if newValue != nil { + BitUtility.setBit(index + self.offset, buffer: self.nulls) + } else { + self.nullCount += 1 + BitUtility.clearBit(index + self.offset, buffer: self.nulls) + } + } + + public func resize(_ length: UInt) { + if length > self.nulls.length { + let resizeLength = resizeLength(self.nulls) + var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) + self.nulls = nulls + } + } + + public func finish() -> [ArrowBuffer] { + let length = self.length + var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) + return [nulls] + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift index c701653ecb2c9..22c0672b27eac 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -23,7 +23,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBinary) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(BinaryArray(arrowData))) + return .success(ArrowArrayHolderImpl(try BinaryArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -36,7 +36,7 @@ private func makeStringHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowString) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(StringArray(arrowData))) + return .success(ArrowArrayHolderImpl(try StringArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -51,11 +51,11 @@ private func makeDateHolder(_ field: ArrowField, do { if field.type.id == .date32 { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(Date32Array(arrowData))) + return .success(ArrowArrayHolderImpl(try Date32Array(arrowData))) } let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(Date64Array(arrowData))) + return .success(ArrowArrayHolderImpl(try Date64Array(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -71,7 +71,7 @@ private func makeTimeHolder(_ field: ArrowField, if field.type.id == .time32 { if let arrowType = field.type as? ArrowTypeTime32 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -79,7 +79,7 @@ private func makeTimeHolder(_ field: ArrowField, if let arrowType = field.type as? ArrowTypeTime64 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -95,7 +95,7 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBool) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(BoolArray(arrowData))) + return .success(ArrowArrayHolderImpl(try BoolArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -109,7 +109,7 @@ private func makeFixedHolder( ) -> Result { do { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift index ed0cb1148e871..bfd7492064352 100644 --- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -212,6 +212,67 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length XCTAssertEqual(microArray[2], 987654321) } + func testStructArray() throws { // swiftlint:disable:this function_body_length + class StructTest { + var fieldBool: Bool = false + var fieldInt8: Int8 = 0 + var fieldInt16: Int16 = 0 + var fieldInt32: Int32 = 0 + var fieldInt64: Int64 = 0 + var fieldUInt8: UInt8 = 0 + var fieldUInt16: UInt16 = 0 + var fieldUInt32: UInt32 = 0 + var fieldUInt64: UInt64 = 0 + var fieldDouble: Double = 0 + var fieldFloat: Float = 0 + var fieldString: String = "" + var fieldData = Data() + var fieldDate: Date = Date.now + } + + enum STIndex: Int { + case bool, int8, int16, int32, int64 + case uint8, uint16, uint32, uint64, double + case float, string, data, date + } + + let testData = StructTest() + let dateNow = Date.now + let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType(testData) + structBuilder.append([true, Int8(1), Int16(2), Int32(3), Int64(4), + UInt8(5), UInt16(6), UInt32(7), UInt64(8), Double(9.9), + Float(10.10), "11", Data("12".utf8), dateNow]) + structBuilder.append(nil) + structBuilder.append([true, Int8(13), Int16(14), Int32(15), Int64(16), + UInt8(17), UInt16(18), UInt32(19), UInt64(20), Double(21.21), + Float(22.22), "23", Data("24".utf8), dateNow]) + XCTAssertEqual(structBuilder.length, 3) + let structArray = try structBuilder.finish() + XCTAssertEqual(structArray.length, 3) + XCTAssertNil(structArray[1]) + XCTAssertEqual(structArray.arrowFields![0].length, 3) + XCTAssertNil(structArray.arrowFields![0].array.asAny(1)) + XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true) + XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1) + XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2) + XCTAssertEqual(structArray[0]![STIndex.int32.rawValue] as? Int32, 3) + XCTAssertEqual(structArray[0]![STIndex.int64.rawValue] as? Int64, 4) + XCTAssertEqual(structArray[0]![STIndex.uint8.rawValue] as? UInt8, 5) + XCTAssertEqual(structArray[0]![STIndex.uint16.rawValue] as? UInt16, 6) + XCTAssertEqual(structArray[0]![STIndex.uint32.rawValue] as? UInt32, 7) + XCTAssertEqual(structArray[0]![STIndex.uint64.rawValue] as? UInt64, 8) + XCTAssertEqual(structArray[0]![STIndex.double.rawValue] as? Double, 9.9) + XCTAssertEqual(structArray[0]![STIndex.float.rawValue] as? Float, 10.10) + XCTAssertEqual(structArray[2]![STIndex.string.rawValue] as? String, "23") + XCTAssertEqual( + String(decoding: (structArray[0]![STIndex.data.rawValue] as? Data)!, as: UTF8.self), "12") + let dateFormatter = DateFormatter() + dateFormatter.timeStyle = .full + XCTAssertTrue( + dateFormatter.string(from: (structArray[0]![STIndex.date.rawValue] as? Date)!) == + dateFormatter.string(from: dateNow)) + } + func checkHolderForType(_ checkType: ArrowType) throws { let buffers = [ArrowBuffer(length: 0, capacity: 0, rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)), From bbfb7b964fd6e0a0675b444cf50a96e41177c314 Mon Sep 17 00:00:00 2001 From: James Henderson Date: Fri, 19 Jul 2024 02:52:34 +0100 Subject: [PATCH 092/122] GH-43320: [Java] fix for SchemaChangeRuntimeException transferring empty FixedSizeListVector (#43321) ### What changes are included in this PR? When we create a FSLV through TransferImpl, we check to see if the source's element vector is a ZeroVector and, if not, we don't call addOrGetVector. ### Are these changes tested? Yep - see TestFixedSizeListVector ### Are there any user-facing changes? No * GitHub Issue: #43320 Authored-by: James Henderson Signed-off-by: David Li --- .../vector/complex/FixedSizeListVector.java | 4 ++- .../arrow/vector/TestFixedSizeListVector.java | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 7a88eaf162314..cb4550848088c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -602,7 +602,9 @@ public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { public TransferImpl(FixedSizeListVector to) { this.to = to; - to.addOrGetVector(vector.getField().getFieldType()); + if (!(vector instanceof ZeroVector)) { + to.addOrGetVector(vector.getField().getFieldType()); + } dataPair = vector.makeTransferPair(to.vector); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index fc220e0f05a14..f582406de6808 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -26,6 +26,7 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.arrow.memory.BufferAllocator; @@ -243,6 +244,34 @@ public void testTransferPair() { } } + @Test + public void testTransferEmptyVector() throws Exception { + // #43320 + try (FixedSizeListVector src = + new FixedSizeListVector( + "src", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null); + FixedSizeListVector dest = + new FixedSizeListVector( + "dest", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) { + src.makeTransferPair(dest).transfer(); + + IntVector els = + (IntVector) dest.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector(); + + dest.allocateNew(); + dest.startNewValue(0); + els.setSafe(0, 1); + els.setSafe(1, 2); + dest.setValueCount(1); + + List expected = new ArrayList<>(2); + expected.add(1); + expected.add(2); + + assertEquals(expected, dest.getObject(0)); + } + } + @Test public void testConsistentChildName() throws Exception { try (FixedSizeListVector listVector = From 9fb11293678c3c23c9ed97630592dac768a2b4ac Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 18 Jul 2024 23:18:07 -0300 Subject: [PATCH 093/122] GH-43258: [C++][Flight] Use a Base CRTP type for the types used in RPC calls (#43255) ### Rationale for this change Flight should eventually be buildable without a specific protobuf version. As such, all the protobuf types are wrapped in hand-written classes. Uniformity of the interface is not enforced even though it's desirable. Extending the interface requires adding functions to every struct. A common base class can mitigates risks and reduce the amount of hand-written code. ### What changes are included in this PR? - Definition of a BaseType<> template (a CRTP) - Add constructors that aren't implicitly defined anymore - Having a default value for some fields that would be undefined values otherwise - Leverage this structure to add `SerializeToBuffer` to all the types - ~30KiB in binary size reduction ### Are these changes tested? By existing tests. * GitHub Issue: #43258 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/flight/client.cc | 20 +- cpp/src/arrow/flight/flight_internals_test.cc | 41 +- cpp/src/arrow/flight/flight_test.cc | 3 +- .../arrow/flight/serialization_internal.cc | 45 +- cpp/src/arrow/flight/serialization_internal.h | 4 +- .../arrow/flight/sql/example/sqlite_server.cc | 4 +- cpp/src/arrow/flight/sql/server.cc | 35 +- cpp/src/arrow/flight/test_util.cc | 10 +- .../flight/transport/grpc/grpc_client.cc | 12 +- .../arrow/flight/transport/ucx/ucx_server.cc | 4 +- cpp/src/arrow/flight/types.cc | 212 ++++--- cpp/src/arrow/flight/types.h | 569 +++++++++++------- 12 files changed, 543 insertions(+), 416 deletions(-) diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc index 58a3ba4ab83e5..d0aee8ab9b3d2 100644 --- a/cpp/src/arrow/flight/client.cc +++ b/cpp/src/arrow/flight/client.cc @@ -584,8 +584,8 @@ arrow::Result> FlightClient::DoAction( arrow::Result FlightClient::CancelFlightInfo( const FlightCallOptions& options, const CancelFlightInfoRequest& request) { - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kCancelFlightInfo.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kCancelFlightInfo.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE(auto cancel_result, CancelFlightInfoResult::Deserialize( @@ -596,8 +596,8 @@ arrow::Result FlightClient::CancelFlightInfo( arrow::Result FlightClient::RenewFlightEndpoint( const FlightCallOptions& options, const RenewFlightEndpointRequest& request) { - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kRenewFlightEndpoint.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kRenewFlightEndpoint.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE(auto renewed_endpoint, @@ -716,8 +716,8 @@ arrow::Result FlightClient::DoExchange( ::arrow::Result FlightClient::SetSessionOptions( const FlightCallOptions& options, const SetSessionOptionsRequest& request) { RETURN_NOT_OK(CheckOpen()); - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kSetSessionOptions.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kSetSessionOptions.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE( @@ -730,8 +730,8 @@ ::arrow::Result FlightClient::SetSessionOptions( ::arrow::Result FlightClient::GetSessionOptions( const FlightCallOptions& options, const GetSessionOptionsRequest& request) { RETURN_NOT_OK(CheckOpen()); - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kGetSessionOptions.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kGetSessionOptions.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE( @@ -744,8 +744,8 @@ ::arrow::Result FlightClient::GetSessionOptions( ::arrow::Result FlightClient::CloseSession( const FlightCallOptions& options, const CloseSessionRequest& request) { RETURN_NOT_OK(CheckOpen()); - ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString()); - Action action{ActionType::kCloseSession.type, Buffer::FromString(body)}; + ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer()); + Action action{ActionType::kCloseSession.type, std::move(body)}; ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action)); ARROW_ASSIGN_OR_RAISE(auto result, stream->Next()); ARROW_ASSIGN_OR_RAISE(auto close_session_result, diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc index 57f4f3e030420..caab357ef8f4a 100644 --- a/cpp/src/arrow/flight/flight_internals_test.cc +++ b/cpp/src/arrow/flight/flight_internals_test.cc @@ -79,8 +79,9 @@ void TestRoundtrip(const std::vector& values, ASSERT_OK(internal::ToProto(values[i], &pb_value)); if constexpr (std::is_same_v) { - ASSERT_OK_AND_ASSIGN(FlightInfo value, internal::FromProto(pb_value)); - EXPECT_EQ(values[i], value); + FlightInfo::Data info_data; + ASSERT_OK(internal::FromProto(pb_value, &info_data)); + EXPECT_EQ(values[i], FlightInfo{std::move(info_data)}); } else if constexpr (std::is_same_v) { std::string data; ASSERT_OK(internal::FromProto(pb_value, &data)); @@ -152,9 +153,11 @@ TEST(FlightTypes, BasicAuth) { } TEST(FlightTypes, Criteria) { - std::vector values = {{""}, {"criteria"}}; - std::vector reprs = {"", - ""}; + std::vector values = {Criteria{""}, Criteria{"criteria"}}; + std::vector reprs = { + "", + "", + }; ASSERT_NO_FATAL_FAILURE(TestRoundtrip(values, reprs)); } @@ -191,14 +194,14 @@ TEST(FlightTypes, FlightEndpoint) { Timestamp expiration_time( std::chrono::duration_cast(expiration_time_duration)); std::vector values = { - {{""}, {}, std::nullopt, {}}, - {{"foo"}, {}, std::nullopt, {}}, - {{"bar"}, {}, std::nullopt, {"\xDE\xAD\xBE\xEF"}}, - {{"foo"}, {}, expiration_time, {}}, - {{"foo"}, {location1}, std::nullopt, {}}, - {{"bar"}, {location1}, std::nullopt, {}}, - {{"foo"}, {location2}, std::nullopt, {}}, - {{"foo"}, {location1, location2}, std::nullopt, {"\xba\xdd\xca\xfe"}}, + {Ticket{""}, {}, std::nullopt, {}}, + {Ticket{"foo"}, {}, std::nullopt, {}}, + {Ticket{"bar"}, {}, std::nullopt, {"\xDE\xAD\xBE\xEF"}}, + {Ticket{"foo"}, {}, expiration_time, {}}, + {Ticket{"foo"}, {location1}, std::nullopt, {}}, + {Ticket{"bar"}, {location1}, std::nullopt, {}}, + {Ticket{"foo"}, {location2}, std::nullopt, {}}, + {Ticket{"foo"}, {location1, location2}, std::nullopt, {"\xba\xdd\xca\xfe"}}, }; std::vector reprs = { " locations=[] " @@ -299,9 +302,9 @@ TEST(FlightTypes, PollInfo) { TEST(FlightTypes, Result) { std::vector values = { - {Buffer::FromString("")}, - {Buffer::FromString("foo")}, - {Buffer::FromString("bar")}, + Result{Buffer::FromString("")}, + Result{Buffer::FromString("foo")}, + Result{Buffer::FromString("bar")}, }; std::vector reprs = { "", @@ -333,9 +336,9 @@ TEST(FlightTypes, SchemaResult) { TEST(FlightTypes, Ticket) { std::vector values = { - {""}, - {"foo"}, - {"bar"}, + Ticket{""}, + Ticket{"foo"}, + Ticket{"bar"}, }; std::vector reprs = { "", diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index e179f3406d65e..101bb06b21288 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -998,7 +998,8 @@ TEST_F(TestFlightClient, ListFlights) { } TEST_F(TestFlightClient, ListFlightsWithCriteria) { - ASSERT_OK_AND_ASSIGN(auto listing, client_->ListFlights(FlightCallOptions(), {"foo"})); + ASSERT_OK_AND_ASSIGN(auto listing, + client_->ListFlights(FlightCallOptions{}, Criteria{"foo"})); std::unique_ptr info; ASSERT_OK_AND_ASSIGN(info, listing->Next()); ASSERT_TRUE(info == nullptr); diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc index 10600d055b3a8..fedfc7d5cd590 100644 --- a/cpp/src/arrow/flight/serialization_internal.cc +++ b/cpp/src/arrow/flight/serialization_internal.cc @@ -251,22 +251,28 @@ Status ToProto(const FlightDescriptor& descriptor, pb::FlightDescriptor* pb_desc // FlightInfo -arrow::Result FromProto(const pb::FlightInfo& pb_info) { - FlightInfo::Data info; - RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &info.descriptor)); +Status FromProto(const pb::FlightInfo& pb_info, FlightInfo::Data* info) { + RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &info->descriptor)); - info.schema = pb_info.schema(); + info->schema = pb_info.schema(); - info.endpoints.resize(pb_info.endpoint_size()); + info->endpoints.resize(pb_info.endpoint_size()); for (int i = 0; i < pb_info.endpoint_size(); ++i) { - RETURN_NOT_OK(FromProto(pb_info.endpoint(i), &info.endpoints[i])); + RETURN_NOT_OK(FromProto(pb_info.endpoint(i), &info->endpoints[i])); } - info.total_records = pb_info.total_records(); - info.total_bytes = pb_info.total_bytes(); - info.ordered = pb_info.ordered(); - info.app_metadata = pb_info.app_metadata(); - return FlightInfo(std::move(info)); + info->total_records = pb_info.total_records(); + info->total_bytes = pb_info.total_bytes(); + info->ordered = pb_info.ordered(); + info->app_metadata = pb_info.app_metadata(); + return Status::OK(); +} + +Status FromProto(const pb::FlightInfo& pb_info, std::unique_ptr* info) { + FlightInfo::Data info_data; + RETURN_NOT_OK(FromProto(pb_info, &info_data)); + *info = std::make_unique(std::move(info_data)); + return Status::OK(); } Status FromProto(const pb::BasicAuth& pb_basic_auth, BasicAuth* basic_auth) { @@ -315,8 +321,9 @@ Status ToProto(const FlightInfo& info, pb::FlightInfo* pb_info) { Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) { if (pb_info.has_info()) { - ARROW_ASSIGN_OR_RAISE(auto flight_info, FromProto(pb_info.info())); - info->info = std::make_unique(std::move(flight_info)); + FlightInfo::Data info_data; + RETURN_NOT_OK(FromProto(pb_info.info(), &info_data)); + info->info = std::make_unique(std::move(info_data)); } if (pb_info.has_flight_descriptor()) { FlightDescriptor descriptor; @@ -340,6 +347,13 @@ Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) { return Status::OK(); } +Status FromProto(const pb::PollInfo& pb_info, std::unique_ptr* info) { + PollInfo poll_info; + RETURN_NOT_OK(FromProto(pb_info, &poll_info)); + *info = std::make_unique(std::move(poll_info)); + return Status::OK(); +} + Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) { if (info.info) { RETURN_NOT_OK(ToProto(*info.info, pb_info->mutable_info())); @@ -360,8 +374,9 @@ Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) { Status FromProto(const pb::CancelFlightInfoRequest& pb_request, CancelFlightInfoRequest* request) { - ARROW_ASSIGN_OR_RAISE(FlightInfo info, FromProto(pb_request.info())); - request->info = std::make_unique(std::move(info)); + FlightInfo::Data info_data; + RETURN_NOT_OK(FromProto(pb_request.info(), &info_data)); + request->info = std::make_unique(std::move(info_data)); return Status::OK(); } diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h index 90dde87d3a5eb..ffde47d43c00e 100644 --- a/cpp/src/arrow/flight/serialization_internal.h +++ b/cpp/src/arrow/flight/serialization_internal.h @@ -60,8 +60,10 @@ Status FromProto(const pb::FlightDescriptor& pb_descr, FlightDescriptor* descr); Status FromProto(const pb::FlightEndpoint& pb_endpoint, FlightEndpoint* endpoint); Status FromProto(const pb::RenewFlightEndpointRequest& pb_request, RenewFlightEndpointRequest* request); -arrow::Result FromProto(const pb::FlightInfo& pb_info); +Status FromProto(const pb::FlightInfo& pb_info, FlightInfo::Data* info); +Status FromProto(const pb::FlightInfo& pb_info, std::unique_ptr* info); Status FromProto(const pb::PollInfo& pb_info, PollInfo* info); +Status FromProto(const pb::PollInfo& pb_info, std::unique_ptr* info); Status FromProto(const pb::CancelFlightInfoRequest& pb_request, CancelFlightInfoRequest* request); Status FromProto(const pb::SchemaResult& pb_result, std::string* result); diff --git a/cpp/src/arrow/flight/sql/example/sqlite_server.cc b/cpp/src/arrow/flight/sql/example/sqlite_server.cc index 20b234e90ad3b..0651e6111c25d 100644 --- a/cpp/src/arrow/flight/sql/example/sqlite_server.cc +++ b/cpp/src/arrow/flight/sql/example/sqlite_server.cc @@ -126,7 +126,7 @@ arrow::Result> DoGetSQLiteQuery( arrow::Result> GetFlightInfoForCommand( const FlightDescriptor& descriptor, const std::shared_ptr& schema) { std::vector endpoints{ - FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, ""}}; + FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, ""}}; ARROW_ASSIGN_OR_RAISE(auto result, FlightInfo::Make(*schema, descriptor, endpoints, -1, -1, false)) @@ -389,7 +389,7 @@ class SQLiteFlightSqlServer::Impl { const ServerCallContext& context, const GetTables& command, const FlightDescriptor& descriptor) { std::vector endpoints{ - FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, ""}}; + FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, ""}}; bool include_schema = command.include_schema; ARROW_LOG(INFO) << "GetTables include_schema=" << include_schema; diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index 63d1f5c5225fa..ac89976690877 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -477,13 +477,11 @@ arrow::Result PackActionResult(ActionBeginTransactionResult result) { } arrow::Result PackActionResult(CancelFlightInfoResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; + return result.SerializeToBuffer(); } arrow::Result PackActionResult(const FlightEndpoint& endpoint) { - ARROW_ASSIGN_OR_RAISE(auto serialized, endpoint.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; + return endpoint.SerializeToBuffer(); } arrow::Result PackActionResult(CancelResult result) { @@ -525,21 +523,6 @@ arrow::Result PackActionResult(ActionCreatePreparedStatementResult resul return PackActionResult(pb_result); } -arrow::Result PackActionResult(SetSessionOptionsResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; -} - -arrow::Result PackActionResult(GetSessionOptionsResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; -} - -arrow::Result PackActionResult(CloseSessionResult result) { - ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString()); - return Result{Buffer::FromString(std::move(serialized))}; -} - } // namespace arrow::Result StatementQueryTicket::Deserialize( @@ -908,23 +891,23 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context, std::string_view body(*action.body); ARROW_ASSIGN_OR_RAISE(auto request, SetSessionOptionsRequest::Deserialize(body)); ARROW_ASSIGN_OR_RAISE(auto result, SetSessionOptions(context, request)); - ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result))); + ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer()); - results.push_back(std::move(packed_result)); + results.emplace_back(std::move(packed_result)); } else if (action.type == ActionType::kGetSessionOptions.type) { std::string_view body(*action.body); ARROW_ASSIGN_OR_RAISE(auto request, GetSessionOptionsRequest::Deserialize(body)); ARROW_ASSIGN_OR_RAISE(auto result, GetSessionOptions(context, request)); - ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result))); + ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer()); - results.push_back(std::move(packed_result)); + results.emplace_back(std::move(packed_result)); } else if (action.type == ActionType::kCloseSession.type) { std::string_view body(*action.body); ARROW_ASSIGN_OR_RAISE(auto request, CloseSessionRequest::Deserialize(body)); ARROW_ASSIGN_OR_RAISE(auto result, CloseSession(context, request)); - ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result))); + ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer()); - results.push_back(std::move(packed_result)); + results.emplace_back(std::move(packed_result)); } else { google::protobuf::Any any; if (!any.ParseFromArray(action.body->data(), static_cast(action.body->size()))) { @@ -1063,7 +1046,7 @@ arrow::Result> FlightSqlServerBase::GetFlightInfoSql } std::vector endpoints{ - FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, {}}}; + FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, {}}}; ARROW_ASSIGN_OR_RAISE( auto result, FlightInfo::Make(*SqlSchema::GetSqlInfoSchema(), descriptor, endpoints, -1, -1, false)) diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc index bf2f4c2b4effc..8b4245e74e843 100644 --- a/cpp/src/arrow/flight/test_util.cc +++ b/cpp/src/arrow/flight/test_util.cc @@ -604,11 +604,11 @@ std::vector ExampleFlightInfo() { Location location4 = *Location::ForGrpcTcp("foo4.bar.com", 12345); Location location5 = *Location::ForGrpcTcp("foo5.bar.com", 12345); - FlightEndpoint endpoint1({{"ticket-ints-1"}, {location1}, std::nullopt, {}}); - FlightEndpoint endpoint2({{"ticket-ints-2"}, {location2}, std::nullopt, {}}); - FlightEndpoint endpoint3({{"ticket-cmd"}, {location3}, std::nullopt, {}}); - FlightEndpoint endpoint4({{"ticket-dicts-1"}, {location4}, std::nullopt, {}}); - FlightEndpoint endpoint5({{"ticket-floats-1"}, {location5}, std::nullopt, {}}); + FlightEndpoint endpoint1({Ticket{"ticket-ints-1"}, {location1}, std::nullopt, {}}); + FlightEndpoint endpoint2({Ticket{"ticket-ints-2"}, {location2}, std::nullopt, {}}); + FlightEndpoint endpoint3({Ticket{"ticket-cmd"}, {location3}, std::nullopt, {}}); + FlightEndpoint endpoint4({Ticket{"ticket-dicts-1"}, {location4}, std::nullopt, {}}); + FlightEndpoint endpoint5({Ticket{"ticket-floats-1"}, {location5}, std::nullopt, {}}); FlightDescriptor descr1{FlightDescriptor::PATH, "", {"examples", "ints"}}; FlightDescriptor descr2{FlightDescriptor::CMD, "my_command", {}}; diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc index f799ba761c40d..6d8d40c2ebcf8 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc @@ -648,10 +648,10 @@ class UnaryUnaryAsyncCall : public ::grpc::ClientUnaryReactor, public internal:: void OnDone(const ::grpc::Status& status) override { if (status.ok()) { - auto result = internal::FromProto(pb_response); - client_status = result.status(); + FlightInfo::Data info_data; + client_status = internal::FromProto(pb_response, &info_data); if (client_status.ok()) { - listener->OnNext(std::move(result).MoveValueUnsafe()); + listener->OnNext(FlightInfo{std::move(info_data)}); } } Finish(status); @@ -889,7 +889,8 @@ class GrpcClientImpl : public internal::ClientTransport { pb::FlightInfo pb_info; while (!options.stop_token.IsStopRequested() && stream->Read(&pb_info)) { - ARROW_ASSIGN_OR_RAISE(FlightInfo info_data, internal::FromProto(pb_info)); + FlightInfo::Data info_data; + RETURN_NOT_OK(internal::FromProto(pb_info, &info_data)); flights.emplace_back(std::move(info_data)); } if (options.stop_token.IsStopRequested()) rpc.context.TryCancel(); @@ -939,7 +940,8 @@ class GrpcClientImpl : public internal::ClientTransport { stub_->GetFlightInfo(&rpc.context, pb_descriptor, &pb_response), &rpc.context); RETURN_NOT_OK(s); - ARROW_ASSIGN_OR_RAISE(auto info_data, internal::FromProto(pb_response)); + FlightInfo::Data info_data; + RETURN_NOT_OK(internal::FromProto(pb_response, &info_data)); *info = std::make_unique(std::move(info_data)); return Status::OK(); } diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc index cb9c8948ccf1e..55ff138348812 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc @@ -376,7 +376,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport { std::unique_ptr info; std::string response; SERVER_RETURN_NOT_OK(driver, base_->GetFlightInfo(context, descriptor, &info)); - SERVER_RETURN_NOT_OK(driver, info->SerializeToString().Value(&response)); + SERVER_RETURN_NOT_OK(driver, info->DoSerializeToString(&response)); RETURN_NOT_OK(driver->SendFrame(FrameType::kBuffer, reinterpret_cast(response.data()), static_cast(response.size()))); @@ -397,7 +397,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport { std::unique_ptr info; std::string response; SERVER_RETURN_NOT_OK(driver, base_->PollFlightInfo(context, descriptor, &info)); - SERVER_RETURN_NOT_OK(driver, info->SerializeToString().Value(&response)); + SERVER_RETURN_NOT_OK(driver, info->DoSerializeToString(&response)); RETURN_NOT_OK(driver->SendFrame(FrameType::kBuffer, reinterpret_cast(response.data()), static_cast(response.size()))); diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index a04956a4ea3f7..170fbfe2393f9 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -81,20 +81,17 @@ Status SerializeToString(const char* name, const T& in, PBType* out_pb, // Result-returning ser/de functions (more convenient) template -arrow::Result DeserializeProtoString(const char* name, std::string_view serialized) { +arrow::Status DeserializeProtoString(const char* name, std::string_view serialized, + T* out) { PBType pb; RETURN_NOT_OK(ParseFromString(name, serialized, &pb)); - T out; - RETURN_NOT_OK(internal::FromProto(pb, &out)); - return out; + return internal::FromProto(pb, out); } template -arrow::Result SerializeToProtoString(const char* name, const T& in) { +Status SerializeToProtoString(const char* name, const T& in, std::string* out) { PBType pb; - std::string out; - RETURN_NOT_OK(SerializeToString(name, in, &pb, &out)); - return out; + return SerializeToString(name, in, &pb, out); } } // namespace @@ -230,24 +227,25 @@ bool SchemaResult::Equals(const SchemaResult& other) const { return raw_schema_ == other.raw_schema_; } -arrow::Result SchemaResult::SerializeToString() const { - return SerializeToProtoString("SchemaResult", *this); +arrow::Status SchemaResult::SerializeToString(std::string* out) const { + return SerializeToProtoString("SchemaResult", *this, out); } -arrow::Result SchemaResult::Deserialize(std::string_view serialized) { +arrow::Status SchemaResult::Deserialize(std::string_view serialized, SchemaResult* out) { pb::SchemaResult pb_schema_result; RETURN_NOT_OK(ParseFromString("SchemaResult", serialized, &pb_schema_result)); - return SchemaResult{pb_schema_result.schema()}; + *out = SchemaResult{pb_schema_result.schema()}; + return Status::OK(); } -arrow::Result FlightDescriptor::SerializeToString() const { - return SerializeToProtoString("FlightDescriptor", *this); +arrow::Status FlightDescriptor::SerializeToString(std::string* out) const { + return SerializeToProtoString("FlightDescriptor", *this, out); } -arrow::Result FlightDescriptor::Deserialize( - std::string_view serialized) { +arrow::Status FlightDescriptor::Deserialize(std::string_view serialized, + FlightDescriptor* out) { return DeserializeProtoString( - "FlightDescriptor", serialized); + "FlightDescriptor", serialized, out); } std::string Ticket::ToString() const { @@ -258,12 +256,12 @@ std::string Ticket::ToString() const { bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; } -arrow::Result Ticket::SerializeToString() const { - return SerializeToProtoString("Ticket", *this); +arrow::Status Ticket::SerializeToString(std::string* out) const { + return SerializeToProtoString("Ticket", *this, out); } -arrow::Result Ticket::Deserialize(std::string_view serialized) { - return DeserializeProtoString("Ticket", serialized); +arrow::Status Ticket::Deserialize(std::string_view serialized, Ticket* out) { + return DeserializeProtoString("Ticket", serialized, out); } arrow::Result FlightInfo::Make(const Schema& schema, @@ -279,7 +277,7 @@ arrow::Result FlightInfo::Make(const Schema& schema, data.ordered = ordered; data.app_metadata = std::move(app_metadata); RETURN_NOT_OK(internal::SchemaToString(schema, &data.schema)); - return FlightInfo(data); + return FlightInfo(std::move(data)); } arrow::Result> FlightInfo::GetSchema( @@ -294,16 +292,14 @@ arrow::Result> FlightInfo::GetSchema( return schema_; } -arrow::Result FlightInfo::SerializeToString() const { - return SerializeToProtoString("FlightInfo", *this); +arrow::Status FlightInfo::SerializeToString(std::string* out) const { + return SerializeToProtoString("FlightInfo", *this, out); } -arrow::Result> FlightInfo::Deserialize( - std::string_view serialized) { - pb::FlightInfo pb_info; - RETURN_NOT_OK(ParseFromString("FlightInfo", serialized, &pb_info)); - ARROW_ASSIGN_OR_RAISE(FlightInfo info, internal::FromProto(pb_info)); - return std::make_unique(std::move(info)); +arrow::Status FlightInfo::Deserialize(std::string_view serialized, + std::unique_ptr* out) { + return DeserializeProtoString>( + "FlightInfo", serialized, out); } std::string FlightInfo::ToString() const { @@ -340,17 +336,14 @@ bool FlightInfo::Equals(const FlightInfo& other) const { data_.app_metadata == other.data_.app_metadata; } -arrow::Result PollInfo::SerializeToString() const { - return SerializeToProtoString("PollInfo", *this); +arrow::Status PollInfo::SerializeToString(std::string* out) const { + return SerializeToProtoString("PollInfo", *this, out); } -arrow::Result> PollInfo::Deserialize( - std::string_view serialized) { - pb::PollInfo pb_info; - RETURN_NOT_OK(ParseFromString("PollInfo", serialized, &pb_info)); - PollInfo info; - RETURN_NOT_OK(internal::FromProto(pb_info, &info)); - return std::make_unique(std::move(info)); +arrow::Status PollInfo::Deserialize(std::string_view serialized, + std::unique_ptr* out) { + return DeserializeProtoString>("PollInfo", + serialized, out); } std::string PollInfo::ToString() const { @@ -427,15 +420,15 @@ bool CancelFlightInfoRequest::Equals(const CancelFlightInfoRequest& other) const return info == other.info; } -arrow::Result CancelFlightInfoRequest::SerializeToString() const { +arrow::Status CancelFlightInfoRequest::SerializeToString(std::string* out) const { return SerializeToProtoString("CancelFlightInfoRequest", - *this); + *this, out); } -arrow::Result CancelFlightInfoRequest::Deserialize( - std::string_view serialized) { +arrow::Status CancelFlightInfoRequest::Deserialize(std::string_view serialized, + CancelFlightInfoRequest* out) { return DeserializeProtoString( - "CancelFlightInfoRequest", serialized); + "CancelFlightInfoRequest", serialized, out); } static const char* const SetSessionOptionStatusNames[] = {"Unspecified", "InvalidName", @@ -556,15 +549,15 @@ bool SetSessionOptionsRequest::Equals(const SetSessionOptionsRequest& other) con return CompareSessionOptionMaps(session_options, other.session_options); } -arrow::Result SetSessionOptionsRequest::SerializeToString() const { +arrow::Status SetSessionOptionsRequest::SerializeToString(std::string* out) const { return SerializeToProtoString("SetSessionOptionsRequest", - *this); + *this, out); } -arrow::Result SetSessionOptionsRequest::Deserialize( - std::string_view serialized) { +arrow::Status SetSessionOptionsRequest::Deserialize(std::string_view serialized, + SetSessionOptionsRequest* out) { return DeserializeProtoString( - "SetSessionOptionsRequest", serialized); + "SetSessionOptionsRequest", serialized, out); } // SetSessionOptionsResult @@ -584,15 +577,15 @@ bool SetSessionOptionsResult::Equals(const SetSessionOptionsResult& other) const return true; } -arrow::Result SetSessionOptionsResult::SerializeToString() const { +arrow::Status SetSessionOptionsResult::SerializeToString(std::string* out) const { return SerializeToProtoString("SetSessionOptionsResult", - *this); + *this, out); } -arrow::Result SetSessionOptionsResult::Deserialize( - std::string_view serialized) { +arrow::Status SetSessionOptionsResult::Deserialize(std::string_view serialized, + SetSessionOptionsResult* out) { return DeserializeProtoString( - "SetSessionOptionsResult", serialized); + "SetSessionOptionsResult", serialized, out); } // GetSessionOptionsRequest @@ -605,15 +598,15 @@ bool GetSessionOptionsRequest::Equals(const GetSessionOptionsRequest& other) con return true; } -arrow::Result GetSessionOptionsRequest::SerializeToString() const { +arrow::Status GetSessionOptionsRequest::SerializeToString(std::string* out) const { return SerializeToProtoString("GetSessionOptionsRequest", - *this); + *this, out); } -arrow::Result GetSessionOptionsRequest::Deserialize( - std::string_view serialized) { +arrow::Status GetSessionOptionsRequest::Deserialize(std::string_view serialized, + GetSessionOptionsRequest* out) { return DeserializeProtoString( - "GetSessionOptionsRequest", serialized); + "GetSessionOptionsRequest", serialized, out); } // GetSessionOptionsResult @@ -628,15 +621,15 @@ bool GetSessionOptionsResult::Equals(const GetSessionOptionsResult& other) const return CompareSessionOptionMaps(session_options, other.session_options); } -arrow::Result GetSessionOptionsResult::SerializeToString() const { +arrow::Status GetSessionOptionsResult::SerializeToString(std::string* out) const { return SerializeToProtoString("GetSessionOptionsResult", - *this); + *this, out); } -arrow::Result GetSessionOptionsResult::Deserialize( - std::string_view serialized) { +arrow::Status GetSessionOptionsResult::Deserialize(std::string_view serialized, + GetSessionOptionsResult* out) { return DeserializeProtoString( - "GetSessionOptionsResult", serialized); + "GetSessionOptionsResult", serialized, out); } // CloseSessionRequest @@ -645,14 +638,15 @@ std::string CloseSessionRequest::ToString() const { return " CloseSessionRequest::SerializeToString() const { - return SerializeToProtoString("CloseSessionRequest", *this); +arrow::Status CloseSessionRequest::SerializeToString(std::string* out) const { + return SerializeToProtoString("CloseSessionRequest", *this, + out); } -arrow::Result CloseSessionRequest::Deserialize( - std::string_view serialized) { +arrow::Status CloseSessionRequest::Deserialize(std::string_view serialized, + CloseSessionRequest* out) { return DeserializeProtoString( - "CloseSessionRequest", serialized); + "CloseSessionRequest", serialized, out); } // CloseSessionResult @@ -669,14 +663,14 @@ bool CloseSessionResult::Equals(const CloseSessionResult& other) const { return status == other.status; } -arrow::Result CloseSessionResult::SerializeToString() const { - return SerializeToProtoString("CloseSessionResult", *this); +arrow::Status CloseSessionResult::SerializeToString(std::string* out) const { + return SerializeToProtoString("CloseSessionResult", *this, out); } -arrow::Result CloseSessionResult::Deserialize( - std::string_view serialized) { +arrow::Status CloseSessionResult::Deserialize(std::string_view serialized, + CloseSessionResult* out) { return DeserializeProtoString( - "CloseSessionResult", serialized); + "CloseSessionResult", serialized, out); } Location::Location() { uri_ = std::make_shared(); } @@ -781,13 +775,14 @@ bool FlightEndpoint::Equals(const FlightEndpoint& other) const { return true; } -arrow::Result FlightEndpoint::SerializeToString() const { - return SerializeToProtoString("FlightEndpoint", *this); +arrow::Status FlightEndpoint::SerializeToString(std::string* out) const { + return SerializeToProtoString("FlightEndpoint", *this, out); } -arrow::Result FlightEndpoint::Deserialize(std::string_view serialized) { +arrow::Status FlightEndpoint::Deserialize(std::string_view serialized, + FlightEndpoint* out) { return DeserializeProtoString("FlightEndpoint", - serialized); + serialized, out); } std::string RenewFlightEndpointRequest::ToString() const { @@ -800,16 +795,16 @@ bool RenewFlightEndpointRequest::Equals(const RenewFlightEndpointRequest& other) return endpoint == other.endpoint; } -arrow::Result RenewFlightEndpointRequest::SerializeToString() const { +arrow::Status RenewFlightEndpointRequest::SerializeToString(std::string* out) const { return SerializeToProtoString( - "RenewFlightEndpointRequest", *this); + "RenewFlightEndpointRequest", *this, out); } -arrow::Result RenewFlightEndpointRequest::Deserialize( - std::string_view serialized) { +arrow::Status RenewFlightEndpointRequest::Deserialize(std::string_view serialized, + RenewFlightEndpointRequest* out) { return DeserializeProtoString("RenewFlightEndpointRequest", - serialized); + serialized, out); } std::string ActionType::ToString() const { @@ -847,12 +842,13 @@ bool ActionType::Equals(const ActionType& other) const { return type == other.type && description == other.description; } -arrow::Result ActionType::SerializeToString() const { - return SerializeToProtoString("ActionType", *this); +arrow::Status ActionType::SerializeToString(std::string* out) const { + return SerializeToProtoString("ActionType", *this, out); } -arrow::Result ActionType::Deserialize(std::string_view serialized) { - return DeserializeProtoString("ActionType", serialized); +arrow::Status ActionType::Deserialize(std::string_view serialized, ActionType* out) { + return DeserializeProtoString("ActionType", serialized, + out); } std::string Criteria::ToString() const { @@ -863,12 +859,12 @@ bool Criteria::Equals(const Criteria& other) const { return expression == other.expression; } -arrow::Result Criteria::SerializeToString() const { - return SerializeToProtoString("Criteria", *this); +arrow::Status Criteria::SerializeToString(std::string* out) const { + return SerializeToProtoString("Criteria", *this, out); } -arrow::Result Criteria::Deserialize(std::string_view serialized) { - return DeserializeProtoString("Criteria", serialized); +arrow::Status Criteria::Deserialize(std::string_view serialized, Criteria* out) { + return DeserializeProtoString("Criteria", serialized, out); } std::string Action::ToString() const { @@ -889,12 +885,12 @@ bool Action::Equals(const Action& other) const { ((body == other.body) || (body && other.body && body->Equals(*other.body))); } -arrow::Result Action::SerializeToString() const { - return SerializeToProtoString("Action", *this); +arrow::Status Action::SerializeToString(std::string* out) const { + return SerializeToProtoString("Action", *this, out); } -arrow::Result Action::Deserialize(std::string_view serialized) { - return DeserializeProtoString("Action", serialized); +arrow::Status Action::Deserialize(std::string_view serialized, Action* out) { + return DeserializeProtoString("Action", serialized, out); } std::string Result::ToString() const { @@ -912,12 +908,12 @@ bool Result::Equals(const Result& other) const { return (body == other.body) || (body && other.body && body->Equals(*other.body)); } -arrow::Result Result::SerializeToString() const { - return SerializeToProtoString("Result", *this); +arrow::Status Result::SerializeToString(std::string* out) const { + return SerializeToProtoString("Result", *this, out); } -arrow::Result Result::Deserialize(std::string_view serialized) { - return DeserializeProtoString("Result", serialized); +arrow::Status Result::Deserialize(std::string_view serialized, Result* out) { + return DeserializeProtoString("Result", serialized, out); } std::string CancelFlightInfoResult::ToString() const { @@ -930,15 +926,15 @@ bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const { return status == other.status; } -arrow::Result CancelFlightInfoResult::SerializeToString() const { +arrow::Status CancelFlightInfoResult::SerializeToString(std::string* out) const { return SerializeToProtoString("CancelFlightInfoResult", - *this); + *this, out); } -arrow::Result CancelFlightInfoResult::Deserialize( - std::string_view serialized) { +arrow::Status CancelFlightInfoResult::Deserialize(std::string_view serialized, + CancelFlightInfoResult* out) { return DeserializeProtoString( - "CancelFlightInfoResult", serialized); + "CancelFlightInfoResult", serialized, out); } std::ostream& operator<<(std::ostream& os, CancelStatus status) { @@ -1055,12 +1051,12 @@ bool BasicAuth::Equals(const BasicAuth& other) const { return (username == other.username) && (password == other.password); } -arrow::Result BasicAuth::Deserialize(std::string_view serialized) { - return DeserializeProtoString("BasicAuth", serialized); +arrow::Status BasicAuth::Deserialize(std::string_view serialized, BasicAuth* out) { + return DeserializeProtoString("BasicAuth", serialized, out); } -arrow::Result BasicAuth::SerializeToString() const { - return SerializeToProtoString("BasicAuth", *this); +arrow::Status BasicAuth::SerializeToString(std::string* out) const { + return SerializeToProtoString("BasicAuth", *this, out); } //------------------------------------------------------------ diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index cdf03f21041ee..8061df4ddeb84 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -31,6 +31,7 @@ #include #include +#include "arrow/buffer.h" #include "arrow/flight/type_fwd.h" #include "arrow/flight/visibility.h" #include "arrow/ipc/options.h" @@ -159,29 +160,88 @@ struct ARROW_FLIGHT_EXPORT CertKeyPair { std::string pem_key; }; +namespace internal { + +template +struct remove_unique_ptr { + using type = T; +}; + +template +struct remove_unique_ptr> { + using type = T; +}; + +// Base CRTP type +template +struct BaseType { + protected: + using SuperT = BaseType; + using SelfT = typename remove_unique_ptr::type; + + const SelfT& self() const { return static_cast(*this); } + SelfT& self() { return static_cast(*this); } + + public: + BaseType() = default; + + friend bool operator==(const SelfT& left, const SelfT& right) { + return left.Equals(right); + } + friend bool operator!=(const SelfT& left, const SelfT& right) { + return !left.Equals(right); + } + + /// \brief Serialize this message to its wire-format representation. + inline arrow::Result SerializeToString() const { + std::string out; + ARROW_RETURN_NOT_OK(self().SelfT::SerializeToString(&out)); + return out; + } + + inline static arrow::Result Deserialize(std::string_view serialized) { + T out; + ARROW_RETURN_NOT_OK(SelfT::Deserialize(serialized, &out)); + return out; + } + + inline arrow::Result> SerializeToBuffer() const { + std::string out; + ARROW_RETURN_NOT_OK(self().SelfT::SerializeToString(&out)); + return Buffer::FromString(std::move(out)); + } +}; + +} // namespace internal + /// \brief A type of action that can be performed with the DoAction RPC. -struct ARROW_FLIGHT_EXPORT ActionType { +struct ARROW_FLIGHT_EXPORT ActionType : public internal::BaseType { /// \brief The name of the action. std::string type; /// \brief A human-readable description of the action. std::string description; + ActionType() = default; + + ActionType(std::string type, std::string description) + : type(std::move(type)), description(std::move(description)) {} + std::string ToString() const; bool Equals(const ActionType& other) const; - friend bool operator==(const ActionType& left, const ActionType& right) { - return left.Equals(right); - } - friend bool operator!=(const ActionType& left, const ActionType& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, ActionType* out); static const ActionType kCancelFlightInfo; static const ActionType kRenewFlightEndpoint; @@ -191,71 +251,83 @@ struct ARROW_FLIGHT_EXPORT ActionType { }; /// \brief Opaque selection criteria for ListFlights RPC -struct ARROW_FLIGHT_EXPORT Criteria { +struct ARROW_FLIGHT_EXPORT Criteria : public internal::BaseType { /// Opaque criteria expression, dependent on server implementation std::string expression; + Criteria() = default; + Criteria(std::string expression) // NOLINT runtime/explicit + : expression(std::move(expression)) {} + std::string ToString() const; bool Equals(const Criteria& other) const; - friend bool operator==(const Criteria& left, const Criteria& right) { - return left.Equals(right); - } - friend bool operator!=(const Criteria& left, const Criteria& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Criteria* out); }; /// \brief An action to perform with the DoAction RPC -struct ARROW_FLIGHT_EXPORT Action { +struct ARROW_FLIGHT_EXPORT Action : public internal::BaseType { /// The action type std::string type; /// The action content as a Buffer std::shared_ptr body; + Action() = default; + Action(std::string type, std::shared_ptr body) + : type(std::move(type)), body(std::move(body)) {} + std::string ToString() const; bool Equals(const Action& other) const; - friend bool operator==(const Action& left, const Action& right) { - return left.Equals(right); - } - friend bool operator!=(const Action& left, const Action& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Action* out); }; /// \brief Opaque result returned after executing an action -struct ARROW_FLIGHT_EXPORT Result { +struct ARROW_FLIGHT_EXPORT Result : public internal::BaseType { std::shared_ptr body; + Result() = default; + Result(std::shared_ptr body) // NOLINT runtime/explicit + : body(std::move(body)) {} + std::string ToString() const; bool Equals(const Result& other) const; - friend bool operator==(const Result& left, const Result& right) { - return left.Equals(right); - } - friend bool operator!=(const Result& left, const Result& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Result* out); }; enum class CancelStatus { @@ -275,54 +347,64 @@ enum class CancelStatus { }; /// \brief The result of the CancelFlightInfo action. -struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult { - CancelStatus status; +struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult + : public internal::BaseType { + CancelStatus status = CancelStatus::kUnspecified; + + CancelFlightInfoResult() = default; + CancelFlightInfoResult(CancelStatus status) // NOLINT runtime/explicit + : status(status) {} std::string ToString() const; bool Equals(const CancelFlightInfoResult& other) const; - friend bool operator==(const CancelFlightInfoResult& left, - const CancelFlightInfoResult& right) { - return left.Equals(right); - } - friend bool operator!=(const CancelFlightInfoResult& left, - const CancelFlightInfoResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + CancelFlightInfoResult* out); }; ARROW_FLIGHT_EXPORT std::ostream& operator<<(std::ostream& os, CancelStatus status); /// \brief message for simple auth -struct ARROW_FLIGHT_EXPORT BasicAuth { +struct ARROW_FLIGHT_EXPORT BasicAuth : public internal::BaseType { std::string username; std::string password; + BasicAuth() = default; + BasicAuth(std::string username, std::string password) + : username(std::move(username)), password(std::move(password)) {} + std::string ToString() const; bool Equals(const BasicAuth& other) const; - friend bool operator==(const BasicAuth& left, const BasicAuth& right) { - return left.Equals(right); - } - friend bool operator!=(const BasicAuth& left, const BasicAuth& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; - /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, BasicAuth* out); }; /// \brief A request to retrieve or generate a dataset -struct ARROW_FLIGHT_EXPORT FlightDescriptor { +struct ARROW_FLIGHT_EXPORT FlightDescriptor + : public internal::BaseType { enum DescriptorType { UNKNOWN = 0, /// Unused PATH = 1, /// Named path identifying a dataset @@ -330,7 +412,7 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { }; /// The descriptor type - DescriptorType type; + DescriptorType type = UNKNOWN; /// Opaque value used to express a command. Should only be defined when type /// is CMD @@ -340,22 +422,34 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { /// when type is PATH std::vector path; + FlightDescriptor() = default; + + FlightDescriptor(DescriptorType type, std::string cmd, std::vector path) + : type(type), cmd(std::move(cmd)), path(std::move(path)) {} + bool Equals(const FlightDescriptor& other) const; /// \brief Get a human-readable form of this descriptor. std::string ToString() const; + using SuperT::Deserialize; + using SuperT::SerializeToString; + /// \brief Get the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Parse the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, FlightDescriptor* out); // Convenience factory functions @@ -366,41 +460,38 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { static FlightDescriptor Path(const std::vector& p) { return FlightDescriptor{PATH, "", p}; } - - friend bool operator==(const FlightDescriptor& left, const FlightDescriptor& right) { - return left.Equals(right); - } - friend bool operator!=(const FlightDescriptor& left, const FlightDescriptor& right) { - return !(left == right); - } }; /// \brief Data structure providing an opaque identifier or credential to use /// when requesting a data stream with the DoGet RPC -struct ARROW_FLIGHT_EXPORT Ticket { +struct ARROW_FLIGHT_EXPORT Ticket : public internal::BaseType { std::string ticket; + Ticket() = default; + Ticket(std::string ticket) // NOLINT runtime/explicit + : ticket(std::move(ticket)) {} + std::string ToString() const; bool Equals(const Ticket& other) const; - friend bool operator==(const Ticket& left, const Ticket& right) { - return left.Equals(right); - } - friend bool operator!=(const Ticket& left, const Ticket& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Get the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Parse the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Ticket* out); }; class FlightClient; @@ -416,7 +507,7 @@ ARROW_FLIGHT_EXPORT extern const char* kSchemeGrpcTls; /// \brief A host location (a URI) -struct ARROW_FLIGHT_EXPORT Location { +struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType { public: /// \brief Initialize a blank location. Location(); @@ -464,13 +555,6 @@ struct ARROW_FLIGHT_EXPORT Location { bool Equals(const Location& other) const; - friend bool operator==(const Location& left, const Location& right) { - return left.Equals(right); - } - friend bool operator!=(const Location& left, const Location& right) { - return !(left == right); - } - private: friend class FlightClient; friend class FlightServerBase; @@ -479,7 +563,7 @@ struct ARROW_FLIGHT_EXPORT Location { /// \brief A flight ticket and list of locations where the ticket can be /// redeemed -struct ARROW_FLIGHT_EXPORT FlightEndpoint { +struct ARROW_FLIGHT_EXPORT FlightEndpoint : public internal::BaseType { /// Opaque ticket identify; use with DoGet RPC Ticket ticket; @@ -496,45 +580,56 @@ struct ARROW_FLIGHT_EXPORT FlightEndpoint { /// Opaque Application-defined metadata std::string app_metadata; + FlightEndpoint() = default; + FlightEndpoint(Ticket ticket, std::vector locations, + std::optional expiration_time, std::string app_metadata) + : ticket(std::move(ticket)), + locations(std::move(locations)), + expiration_time(expiration_time), + app_metadata(std::move(app_metadata)) {} + std::string ToString() const; bool Equals(const FlightEndpoint& other) const; - friend bool operator==(const FlightEndpoint& left, const FlightEndpoint& right) { - return left.Equals(right); - } - friend bool operator!=(const FlightEndpoint& left, const FlightEndpoint& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, FlightEndpoint* out); }; /// \brief The request of the RenewFlightEndpoint action. -struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest { +struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest + : public internal::BaseType { FlightEndpoint endpoint; + RenewFlightEndpointRequest() = default; + explicit RenewFlightEndpointRequest(FlightEndpoint endpoint) + : endpoint(std::move(endpoint)) {} + std::string ToString() const; bool Equals(const RenewFlightEndpointRequest& other) const; - friend bool operator==(const RenewFlightEndpointRequest& left, - const RenewFlightEndpointRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const RenewFlightEndpointRequest& left, - const RenewFlightEndpointRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize( - std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + RenewFlightEndpointRequest* out); }; /// \brief Staging data structure for messages about to be put on the wire @@ -545,12 +640,19 @@ struct ARROW_FLIGHT_EXPORT FlightPayload { std::shared_ptr app_metadata; ipc::IpcPayload ipc_message; + FlightPayload() = default; + FlightPayload(std::shared_ptr descriptor, std::shared_ptr app_metadata, + ipc::IpcPayload ipc_message) + : descriptor(std::move(descriptor)), + app_metadata(std::move(app_metadata)), + ipc_message(std::move(ipc_message)) {} + /// \brief Check that the payload can be written to the wire. Status Validate() const; }; /// \brief Schema result returned after a schema request RPC -struct ARROW_FLIGHT_EXPORT SchemaResult { +struct ARROW_FLIGHT_EXPORT SchemaResult : public internal::BaseType { public: SchemaResult() = default; explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {} @@ -570,18 +672,18 @@ struct ARROW_FLIGHT_EXPORT SchemaResult { std::string ToString() const; bool Equals(const SchemaResult& other) const; - friend bool operator==(const SchemaResult& left, const SchemaResult& right) { - return left.Equals(right); - } - friend bool operator!=(const SchemaResult& left, const SchemaResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, SchemaResult* out); private: std::string raw_schema_; @@ -589,7 +691,8 @@ struct ARROW_FLIGHT_EXPORT SchemaResult { /// \brief The access coordinates for retrieval of a dataset, returned by /// GetFlightInfo -class ARROW_FLIGHT_EXPORT FlightInfo { +class ARROW_FLIGHT_EXPORT FlightInfo + : public internal::BaseType> { public: struct Data { std::string schema; @@ -641,18 +744,25 @@ class ARROW_FLIGHT_EXPORT FlightInfo { /// Application-defined opaque metadata const std::string& app_metadata() const { return data_.app_metadata; } + using SuperT::Deserialize; + using SuperT::SerializeToString; + /// \brief Get the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Parse the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result> Deserialize( - std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + std::unique_ptr* out); std::string ToString() const; @@ -661,13 +771,6 @@ class ARROW_FLIGHT_EXPORT FlightInfo { /// the schemas. bool Equals(const FlightInfo& other) const; - friend bool operator==(const FlightInfo& left, const FlightInfo& right) { - return left.Equals(right); - } - friend bool operator!=(const FlightInfo& left, const FlightInfo& right) { - return !(left == right); - } - private: Data data_; mutable std::shared_ptr schema_; @@ -675,7 +778,8 @@ class ARROW_FLIGHT_EXPORT FlightInfo { }; /// \brief The information to process a long-running query. -class ARROW_FLIGHT_EXPORT PollInfo { +class ARROW_FLIGHT_EXPORT PollInfo + : public internal::BaseType> { public: /// The currently available results so far. std::unique_ptr info = NULLPTR; @@ -696,22 +800,19 @@ class ARROW_FLIGHT_EXPORT PollInfo { progress(std::nullopt), expiration_time(std::nullopt) {} - explicit PollInfo(std::unique_ptr info, - std::optional descriptor, - std::optional progress, - std::optional expiration_time) + PollInfo(std::unique_ptr info, std::optional descriptor, + std::optional progress, std::optional expiration_time) : info(std::move(info)), descriptor(std::move(descriptor)), progress(progress), expiration_time(expiration_time) {} - // Must not be explicit; to declare one we must declare all ("rule of five") - PollInfo(const PollInfo& other) // NOLINT(runtime/explicit) + PollInfo(const PollInfo& other) : info(other.info ? std::make_unique(*other.info) : NULLPTR), descriptor(other.descriptor), progress(other.progress), expiration_time(other.expiration_time) {} - PollInfo(PollInfo&& other) noexcept = default; // NOLINT(runtime/explicit) + PollInfo(PollInfo&& other) noexcept = default; ~PollInfo() = default; PollInfo& operator=(const PollInfo& other) { info = other.info ? std::make_unique(*other.info) : NULLPTR; @@ -722,18 +823,25 @@ class ARROW_FLIGHT_EXPORT PollInfo { } PollInfo& operator=(PollInfo&& other) = default; + using SuperT::Deserialize; + using SuperT::SerializeToString; + /// \brief Get the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Parse the wire-format representation of this type. /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result> Deserialize( - std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + std::unique_ptr* out); std::string ToString() const; @@ -741,36 +849,33 @@ class ARROW_FLIGHT_EXPORT PollInfo { /// serialized schema representations, NOT the logical equality of /// the schemas. bool Equals(const PollInfo& other) const; - - friend bool operator==(const PollInfo& left, const PollInfo& right) { - return left.Equals(right); - } - friend bool operator!=(const PollInfo& left, const PollInfo& right) { - return !(left == right); - } }; /// \brief The request of the CancelFlightInfoRequest action. -struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest { +struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest + : public internal::BaseType { std::unique_ptr info; + CancelFlightInfoRequest() = default; + CancelFlightInfoRequest(std::unique_ptr info) // NOLINT runtime/explicit + : info(std::move(info)) {} + std::string ToString() const; bool Equals(const CancelFlightInfoRequest& other) const; - friend bool operator==(const CancelFlightInfoRequest& left, - const CancelFlightInfoRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const CancelFlightInfoRequest& left, - const CancelFlightInfoRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + CancelFlightInfoRequest* out); }; /// \brief Variant supporting all possible value types for {Set,Get}SessionOptions @@ -821,30 +926,36 @@ std::string ToString(const CloseSessionStatus& status); std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status); /// \brief A request to set a set of session options by name/value. -struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest { +struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest + : public internal::BaseType { std::map session_options; + SetSessionOptionsRequest() = default; + explicit SetSessionOptionsRequest( + std::map session_options) + : session_options(std::move(session_options)) {} + std::string ToString() const; bool Equals(const SetSessionOptionsRequest& other) const; - friend bool operator==(const SetSessionOptionsRequest& left, - const SetSessionOptionsRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const SetSessionOptionsRequest& left, - const SetSessionOptionsRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + SetSessionOptionsRequest* out); }; /// \brief The result(s) of setting session option(s). -struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult { +struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult + : public internal::BaseType { struct Error { SetSessionOptionErrorValue value; @@ -859,111 +970,125 @@ struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult { std::map errors; + SetSessionOptionsResult() = default; + SetSessionOptionsResult(std::map errors) // NOLINT runtime/explicit + : errors(std::move(errors)) {} + std::string ToString() const; bool Equals(const SetSessionOptionsResult& other) const; - friend bool operator==(const SetSessionOptionsResult& left, - const SetSessionOptionsResult& right) { - return left.Equals(right); - } - friend bool operator!=(const SetSessionOptionsResult& left, - const SetSessionOptionsResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + SetSessionOptionsResult* out); }; /// \brief A request to get current session options. -struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest { +struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest + : public internal::BaseType { + GetSessionOptionsRequest() = default; + std::string ToString() const; bool Equals(const GetSessionOptionsRequest& other) const; - friend bool operator==(const GetSessionOptionsRequest& left, - const GetSessionOptionsRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const GetSessionOptionsRequest& left, - const GetSessionOptionsRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + GetSessionOptionsRequest* out); }; /// \brief The current session options. -struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult { +struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult + : public internal::BaseType { std::map session_options; + GetSessionOptionsResult() = default; + GetSessionOptionsResult( // NOLINT runtime/explicit + std::map session_options) + : session_options(std::move(session_options)) {} + std::string ToString() const; bool Equals(const GetSessionOptionsResult& other) const; - friend bool operator==(const GetSessionOptionsResult& left, - const GetSessionOptionsResult& right) { - return left.Equals(right); - } - friend bool operator!=(const GetSessionOptionsResult& left, - const GetSessionOptionsResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + GetSessionOptionsResult* out); }; /// \brief A request to close the open client session. -struct ARROW_FLIGHT_EXPORT CloseSessionRequest { +struct ARROW_FLIGHT_EXPORT CloseSessionRequest + : public internal::BaseType { + CloseSessionRequest() = default; + std::string ToString() const; bool Equals(const CloseSessionRequest& other) const; - friend bool operator==(const CloseSessionRequest& left, - const CloseSessionRequest& right) { - return left.Equals(right); - } - friend bool operator!=(const CloseSessionRequest& left, - const CloseSessionRequest& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, CloseSessionRequest* out); }; /// \brief The result of attempting to close the client session. -struct ARROW_FLIGHT_EXPORT CloseSessionResult { +struct ARROW_FLIGHT_EXPORT CloseSessionResult + : public internal::BaseType { CloseSessionStatus status; + CloseSessionResult() = default; + CloseSessionResult(CloseSessionStatus status) // NOLINT runtime/explicit + : status(status) {} + std::string ToString() const; bool Equals(const CloseSessionResult& other) const; - friend bool operator==(const CloseSessionResult& left, - const CloseSessionResult& right) { - return left.Equals(right); - } - friend bool operator!=(const CloseSessionResult& left, - const CloseSessionResult& right) { - return !(left == right); - } + using SuperT::Deserialize; + using SuperT::SerializeToString; /// \brief Serialize this message to its wire-format representation. - arrow::Result SerializeToString() const; + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(std::string_view serialized); + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, CloseSessionResult* out); }; /// \brief An iterator to FlightInfo instances returned by ListFlights. From 299ad7086928c555f3e861c20327276d1c4f2557 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 19 Jul 2024 15:52:18 +1200 Subject: [PATCH 094/122] GH-43266: [C#] Add LargeBinary, LargeString and LargeList array types (#43269) ### Rationale for this change See #43266. Note that LargeBinary and LargeString are still limited to 2 GiB buffers, and LargeList is limited to offsets that can be represented as int32. ### What changes are included in this PR? * Add new Array subtypes: LargeBinaryArray, LargeStringArray and LargeListArray * Support round-tripping these array types via the IPC format * Support round-tripping these array types via the C Data Interface * Improve error messages when importing arrays that are too large via IPC or C Data Interface * Enable integration tests for the new types * Update documentation ### Are these changes tested? Yes, I've added some basic tests specifically for the new array types, and added these to the test data generator so they're covered by the existing tests for round tripping using IPC and C Data Interface. ### Are there any user-facing changes? Yes, this is a new user facing feature. ### Implementation notes * I haven't added builders for these new array types. Given they're added to help with interoperability with other libraries, I wouldn't expect .NET users to build arrays of these types as they provide no other benefit over the non-large types until we have proper large memory support. But I'm happy to add this if it would be useful. * The new array types share a lot of logic with the non-large types. I considered trying to consolidate this logic by adding a new `BinaryArrayBase` class for example, but I think this would require generic math support to work nicely, and would still complicate the code quite a bit and add extra virtual method call overhead. So I think it's fine to keep these new Array subtypes independent from the non-large types. * I haven't included support for materializing a LargeStringArray (see #41048). I'm not sure whether there would be a use for this, but it could be added later if needed. * GitHub Issue: #43266 Authored-by: Adam Reeve Signed-off-by: Curt Hagenlocher --- csharp/README.md | 3 +- .../Apache.Arrow/Arrays/ArrowArrayFactory.cs | 6 + .../Apache.Arrow/Arrays/LargeBinaryArray.cs | 154 ++++++++++++++++++ .../src/Apache.Arrow/Arrays/LargeListArray.cs | 97 +++++++++++ .../Apache.Arrow/Arrays/LargeStringArray.cs | 113 +++++++++++++ .../src/Apache.Arrow/C/CArrowArrayImporter.cs | 69 ++++++++ .../Apache.Arrow/C/CArrowSchemaExporter.cs | 5 +- .../Apache.Arrow/C/CArrowSchemaImporter.cs | 14 +- .../Ipc/ArrowReaderImplementation.cs | 2 + .../Ipc/ArrowStreamReaderImplementation.cs | 8 +- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 76 +++++++++ .../Ipc/ArrowTypeFlatbufferBuilder.cs | 29 +++- .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 10 ++ csharp/src/Apache.Arrow/Types/IArrowType.cs | 3 + .../src/Apache.Arrow/Types/LargeBinaryType.cs | 27 +++ .../src/Apache.Arrow/Types/LargeListType.cs | 36 ++++ .../src/Apache.Arrow/Types/LargeStringType.cs | 27 +++ .../Apache.Arrow.IntegrationTest/JsonFile.cs | 88 ++++++++++ .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 78 +++++++++ .../LargeBinaryArrayTests.cs | 95 +++++++++++ .../Apache.Arrow.Tests/LargeListArrayTests.cs | 105 ++++++++++++ .../LargeStringArrayTests.cs | 91 +++++++++++ csharp/test/Apache.Arrow.Tests/TableTests.cs | 4 +- csharp/test/Apache.Arrow.Tests/TestData.cs | 118 ++++++++++++++ dev/archery/archery/integration/datagen.py | 4 +- docs/source/status.rst | 8 +- 26 files changed, 1254 insertions(+), 16 deletions(-) create mode 100644 csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/LargeListArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs create mode 100644 csharp/src/Apache.Arrow/Types/LargeBinaryType.cs create mode 100644 csharp/src/Apache.Arrow/Types/LargeListType.cs create mode 100644 csharp/src/Apache.Arrow/Types/LargeStringType.cs create mode 100644 csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs create mode 100644 csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs create mode 100644 csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs diff --git a/csharp/README.md b/csharp/README.md index b36eb899db2d5..663aaf8ab243c 100644 --- a/csharp/README.md +++ b/csharp/README.md @@ -129,7 +129,8 @@ for currently available features. - Types - Tensor - Arrays - - Large Arrays + - Large Arrays. There are large array types provided to help with interoperability with other libraries, + but these do not support buffers larger than 2 GiB and an exception will be raised if trying to import an array that is too large. - Large Binary - Large List - Large String diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index 67c4b21a2e531..bd06c3a1b8b14 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -53,18 +53,24 @@ public static IArrowArray BuildArray(ArrayData data) return new StringArray(data); case ArrowTypeId.StringView: return new StringViewArray(data); + case ArrowTypeId.LargeString: + return new LargeStringArray(data); case ArrowTypeId.FixedSizedBinary: return new FixedSizeBinaryArray(data); case ArrowTypeId.Binary: return new BinaryArray(data); case ArrowTypeId.BinaryView: return new BinaryViewArray(data); + case ArrowTypeId.LargeBinary: + return new LargeBinaryArray(data); case ArrowTypeId.Timestamp: return new TimestampArray(data); case ArrowTypeId.List: return new ListArray(data); case ArrowTypeId.ListView: return new ListViewArray(data); + case ArrowTypeId.LargeList: + return new LargeListArray(data); case ArrowTypeId.Map: return new MapArray(data); case ArrowTypeId.Struct: diff --git a/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs new file mode 100644 index 0000000000000..9eddbedab54ed --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Apache.Arrow; + +public class LargeBinaryArray : Array, IReadOnlyList, ICollection +{ + public LargeBinaryArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.LargeBinary); + data.EnsureBufferCount(3); + } + + public LargeBinaryArray(ArrowTypeId typeId, ArrayData data) + : base(data) + { + data.EnsureDataType(typeId); + data.EnsureBufferCount(3); + } + + public LargeBinaryArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, + ArrowBuffer dataBuffer, + ArrowBuffer nullBitmapBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) + { } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1]; + + public ArrowBuffer ValueBuffer => Data.Buffers[2]; + + public ReadOnlySpan ValueOffsets => ValueOffsetsBuffer.Span.CastTo().Slice(Offset, Length + 1); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetValueLength(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + if (!IsValid(index)) + { + return 0; + } + + ReadOnlySpan offsets = ValueOffsets; + return checked((int)(offsets[index + 1] - offsets[index])); + } + + /// + /// Get the collection of bytes, as a read-only span, at a given index in the array. + /// + /// + /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte + /// collection values when seen in the context of this method's return type of . + /// Use the method or the overload instead + /// to reliably determine null values. + /// + /// Index at which to get bytes. + /// Returns a object. + /// If the index is negative or beyond the length of the array. + /// + public ReadOnlySpan GetBytes(int index) => GetBytes(index, out _); + + /// + /// Get the collection of bytes, as a read-only span, at a given index in the array. + /// + /// Index at which to get bytes. + /// Set to if the value at the given index is null. + /// Returns a object. + /// If the index is negative or beyond the length of the array. + /// + public ReadOnlySpan GetBytes(int index, out bool isNull) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + isNull = IsNull(index); + + if (isNull) + { + // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span + // is actually returned as an empty span. + return ReadOnlySpan.Empty; + } + + var offset = checked((int)ValueOffsets[index]); + return ValueBuffer.Span.Slice(offset, GetValueLength(index)); + } + + int IReadOnlyCollection.Count => Length; + + byte[] IReadOnlyList.this[int index] => GetBytes(index).ToArray(); + + IEnumerator IEnumerable.GetEnumerator() + { + for (int index = 0; index < Length; index++) + { + yield return GetBytes(index).ToArray(); + } + } + + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(byte[] item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(byte[] item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(byte[] item) + { + for (int index = 0; index < Length; index++) + { + if (GetBytes(index).SequenceEqual(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(byte[][] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetBytes(srcIndex).ToArray(); + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs new file mode 100644 index 0000000000000..6e37aa4c63536 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class LargeListArray : Array + { + public IArrowArray Values { get; } + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1]; + + public ReadOnlySpan ValueOffsets => ValueOffsetsBuffer.Span.CastTo().Slice(Offset, Length + 1); + + public LargeListArray(IArrowType dataType, int length, + ArrowBuffer valueOffsetsBuffer, IArrowArray values, + ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }), + values) + { + } + + public LargeListArray(ArrayData data) + : this(data, ArrowArrayFactory.BuildArray(data.Children[0])) + { + } + + private LargeListArray(ArrayData data, IArrowArray values) : base(data) + { + data.EnsureBufferCount(2); + data.EnsureDataType(ArrowTypeId.LargeList); + Values = values; + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + public int GetValueLength(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + return 0; + } + + ReadOnlySpan offsets = ValueOffsets; + return checked((int)(offsets[index + 1] - offsets[index])); + } + + public IArrowArray GetSlicedValues(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (IsNull(index)) + { + return null; + } + + if (!(Values is Array array)) + { + return default; + } + + return array.Slice(checked((int)ValueOffsets[index]), GetValueLength(index)); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + Values?.Dispose(); + } + base.Dispose(disposing); + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs new file mode 100644 index 0000000000000..2a65b828acfa1 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using Apache.Arrow.Types; + +namespace Apache.Arrow; + +public class LargeStringArray: LargeBinaryArray, IReadOnlyList, ICollection +{ + public static readonly Encoding DefaultEncoding = StringArray.DefaultEncoding; + + public LargeStringArray(ArrayData data) + : base(ArrowTypeId.LargeString, data) { } + + public LargeStringArray(int length, + ArrowBuffer valueOffsetsBuffer, + ArrowBuffer dataBuffer, + ArrowBuffer nullBitmapBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(LargeStringType.Default, length, nullCount, offset, + new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer })) + { } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + /// + /// Get the string value at the given index + /// + /// Input index + /// Optional: the string encoding, default is UTF8 + /// The string object at the given index + public string GetString(int index, Encoding encoding = default) + { + encoding ??= DefaultEncoding; + + ReadOnlySpan bytes = GetBytes(index, out bool isNull); + + if (isNull) + { + return null; + } + + if (bytes.Length == 0) + { + return string.Empty; + } + + unsafe + { + fixed (byte* data = &MemoryMarshal.GetReference(bytes)) + { + return encoding.GetString(data, bytes.Length); + } + } + } + + + int IReadOnlyCollection.Count => Length; + + string IReadOnlyList.this[int index] => GetString(index); + + IEnumerator IEnumerable.GetEnumerator() + { + for (int index = 0; index < Length; index++) + { + yield return GetString(index); + }; + } + + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(string item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(string item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(string item) + { + for (int index = 0; index < Length; index++) + { + if (GetString(index) == item) + return true; + } + + return false; + } + + void ICollection.CopyTo(string[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetString(srcIndex); + } + } +} diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs index abe02dcbb591f..68b67f3d7c620 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs @@ -162,6 +162,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) case ArrowTypeId.BinaryView: buffers = ImportByteArrayViewBuffers(cArray); break; + case ArrowTypeId.LargeString: + case ArrowTypeId.LargeBinary: + buffers = ImportLargeByteArrayBuffers(cArray); + break; case ArrowTypeId.List: children = ProcessListChildren(cArray, ((ListType)type).ValueDataType); buffers = ImportListBuffers(cArray); @@ -170,6 +174,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type) children = ProcessListChildren(cArray, ((ListViewType)type).ValueDataType); buffers = ImportListViewBuffers(cArray); break; + case ArrowTypeId.LargeList: + children = ProcessListChildren(cArray, ((LargeListType)type).ValueDataType); + buffers = ImportLargeListBuffers(cArray); + break; case ArrowTypeId.FixedSizeList: children = ProcessListChildren(cArray, ((FixedSizeListType)type).ValueDataType); buffers = ImportFixedSizeListBuffers(cArray); @@ -313,6 +321,42 @@ private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray) return buffers; } + private ArrowBuffer[] ImportLargeByteArrayBuffers(CArrowArray* cArray) + { + if (cArray->n_buffers != 3) + { + throw new InvalidOperationException("Large byte arrays are expected to have exactly three buffers"); + } + + const int maxLength = int.MaxValue / 8 - 1; + if (cArray->length > maxLength) + { + throw new OverflowException( + $"Cannot import large byte array. Array length {cArray->length} " + + $"is greater than the maximum supported large byte array length ({maxLength})"); + } + + int length = (int)cArray->length; + int offsetsLength = (length + 1) * 8; + long* offsets = (long*)cArray->buffers[1]; + Debug.Assert(offsets != null); + long valuesLength = offsets[length]; + + if (valuesLength > int.MaxValue) + { + throw new OverflowException( + $"Cannot import large byte array. Data length {valuesLength} " + + $"is greater than the maximum supported large byte array data length ({int.MaxValue})"); + } + + ArrowBuffer[] buffers = new ArrowBuffer[3]; + buffers[0] = ImportValidityBuffer(cArray); + buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength); + buffers[2] = ImportCArrayBuffer(cArray, 2, (int)valuesLength); + + return buffers; + } + private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray) { if (cArray->n_buffers != 2) @@ -348,6 +392,31 @@ private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray) return buffers; } + private ArrowBuffer[] ImportLargeListBuffers(CArrowArray* cArray) + { + if (cArray->n_buffers != 2) + { + throw new InvalidOperationException("Large list arrays are expected to have exactly two buffers"); + } + + const int maxLength = int.MaxValue / 8 - 1; + if (cArray->length > maxLength) + { + throw new OverflowException( + $"Cannot import large list array. Array length {cArray->length} " + + $"is greater than the maximum supported large list array length ({maxLength})"); + } + + int length = (int)cArray->length; + int offsetsLength = (length + 1) * 8; + + ArrowBuffer[] buffers = new ArrowBuffer[2]; + buffers[0] = ImportValidityBuffer(cArray); + buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength); + + return buffers; + } + private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray* cArray) { if (cArray->n_buffers != 1) diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs index 3bb7134af3ba9..92d48a2d70880 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs @@ -168,8 +168,10 @@ private static string GetFormat(IArrowType datatype) // Binary case BinaryType _: return "z"; case BinaryViewType _: return "vz"; + case LargeBinaryType _: return "Z"; case StringType _: return "u"; case StringViewType _: return "vu"; + case LargeStringType _: return "U"; case FixedSizeBinaryType binaryType: return $"w:{binaryType.ByteWidth}"; // Date @@ -199,6 +201,7 @@ private static string GetFormat(IArrowType datatype) // Nested case ListType _: return "+l"; case ListViewType _: return "+vl"; + case LargeListType _: return "+L"; case FixedSizeListType fixedListType: return $"+w:{fixedListType.ListSize}"; case StructType _: return "+s"; @@ -208,7 +211,7 @@ private static string GetFormat(IArrowType datatype) case DictionaryType dictionaryType: return GetFormat(dictionaryType.IndexType); default: throw new NotImplementedException($"Exporting {datatype.Name} not implemented"); - }; + } } private static long GetFlags(IArrowType datatype, bool nullable = true) diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index f1acc007bcef7..94177184dea00 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -165,7 +165,7 @@ public ArrowType GetAsType() } // Special handling for nested types - if (format == "+l" || format == "+vl") + if (format == "+l" || format == "+vl" || format == "+L") { if (_cSchema->n_children != 1) { @@ -180,7 +180,13 @@ public ArrowType GetAsType() Field childField = childSchema.GetAsField(); - return format[1] == 'v' ? new ListViewType(childField) : new ListType(childField); + return format[1] switch + { + 'l' => new ListType(childField), + 'v' => new ListViewType(childField), + 'L' => new LargeListType(childField), + _ => throw new InvalidDataException($"Invalid format for list: '{format}'"), + }; } else if (format == "+s") { @@ -304,10 +310,10 @@ public ArrowType GetAsType() // Binary data "z" => BinaryType.Default, "vz" => BinaryViewType.Default, - //"Z" => new LargeBinaryType() // Not yet implemented + "Z" => LargeBinaryType.Default, "u" => StringType.Default, "vu" => StringViewType.Default, - //"U" => new LargeStringType(), // Not yet implemented + "U" => LargeStringType.Default, // Date and time "tdD" => Date32Type.Default, "tdm" => Date64Type.Default, diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index a37c501072f4b..7e766677f8b28 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -291,6 +291,8 @@ private ArrayData LoadField( break; case ArrowTypeId.String: case ArrowTypeId.Binary: + case ArrowTypeId.LargeString: + case ArrowTypeId.LargeBinary: case ArrowTypeId.ListView: buffers = 3; break; diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs index 5583a58487bf5..12a2a17cf04e2 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs @@ -132,7 +132,13 @@ protected ReadResult ReadMessage() Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff)); - int bodyLength = checked((int)message.BodyLength); + if (message.BodyLength > int.MaxValue) + { + throw new OverflowException( + $"Arrow IPC message body length ({message.BodyLength}) is larger than " + + $"the maximum supported message size ({int.MaxValue})"); + } + int bodyLength = (int)message.BodyLength; IMemoryOwner bodyBuffOwner = _allocator.Allocate(bodyLength); Memory bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index c66569afeba85..eaa8471fa7bd3 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -57,11 +57,14 @@ private class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, @@ -199,6 +202,28 @@ public void Visit(ListViewArray array) VisitArray(values); } + public void Visit(LargeListArray array) + { + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateBuffer(GetZeroBasedLongValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); + + int valuesOffset = 0; + int valuesLength = 0; + if (array.Length > 0) + { + valuesOffset = checked((int)array.ValueOffsets[0]); + valuesLength = checked((int)array.ValueOffsets[array.Length] - valuesOffset); + } + + var values = array.Values; + if (valuesOffset > 0 || valuesLength < values.Length) + { + values = ArrowArrayFactory.Slice(values, valuesOffset, valuesLength); + } + + VisitArray(values); + } + public void Visit(FixedSizeListArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); @@ -214,6 +239,8 @@ public void Visit(FixedSizeListArray array) public void Visit(StringViewArray array) => Visit(array as BinaryViewArray); + public void Visit(LargeStringArray array) => Visit(array as LargeBinaryArray); + public void Visit(BinaryArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); @@ -242,6 +269,22 @@ public void Visit(BinaryViewArray array) VariadicCounts.Add(array.DataBufferCount); } + public void Visit(LargeBinaryArray array) + { + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateBuffer(GetZeroBasedLongValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length))); + + int valuesOffset = 0; + int valuesLength = 0; + if (array.Length > 0) + { + valuesOffset = checked((int)array.ValueOffsets[0]); + valuesLength = checked((int)array.ValueOffsets[array.Length]) - valuesOffset; + } + + _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, valuesOffset, valuesLength)); + } + public void Visit(FixedSizeBinaryArray array) { var itemSize = ((FixedSizeBinaryType)array.Data.DataType).ByteWidth; @@ -327,6 +370,39 @@ private ArrowBuffer GetZeroBasedValueOffsets(ArrowBuffer valueOffsetsBuffer, int } } + private ArrowBuffer GetZeroBasedLongValueOffsets(ArrowBuffer valueOffsetsBuffer, int arrayOffset, int arrayLength) + { + var requiredBytes = CalculatePaddedBufferLength(checked(sizeof(long) * (arrayLength + 1))); + + if (arrayOffset != 0) + { + // Array has been sliced, so we need to shift and adjust the offsets + var originalOffsets = valueOffsetsBuffer.Span.CastTo().Slice(arrayOffset, arrayLength + 1); + var firstOffset = arrayLength > 0 ? originalOffsets[0] : 0L; + + var newValueOffsetsBuffer = _allocator.Allocate(requiredBytes); + var newValueOffsets = newValueOffsetsBuffer.Memory.Span.CastTo(); + + for (int i = 0; i < arrayLength + 1; ++i) + { + newValueOffsets[i] = originalOffsets[i] - firstOffset; + } + + return new ArrowBuffer(newValueOffsetsBuffer); + } + else if (valueOffsetsBuffer.Length > requiredBytes) + { + // Array may have been sliced but the offset is zero, + // so we can truncate the existing offsets + return new ArrowBuffer(valueOffsetsBuffer.Memory.Slice(0, requiredBytes)); + } + else + { + // Use the full buffer + return valueOffsetsBuffer; + } + } + private (ArrowBuffer Buffer, int minOffset, int maxEnd) GetZeroBasedListViewOffsets(ListViewArray array) { if (array.Length == 0) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index 473e18968f8cb..adc229a051227 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -57,6 +57,7 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -65,9 +66,11 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -120,6 +123,14 @@ public void Visit(BinaryViewType type) Flatbuf.Type.BinaryView, offset); } + public void Visit(LargeBinaryType type) + { + Flatbuf.LargeBinary.StartLargeBinary(Builder); + Offset offset = Flatbuf.LargeBinary.EndLargeBinary(Builder); + Result = FieldType.Build( + Flatbuf.Type.LargeBinary, offset); + } + public void Visit(ListType type) { Flatbuf.List.StartList(Builder); @@ -136,6 +147,14 @@ public void Visit(ListViewType type) Flatbuf.ListView.EndListView(Builder)); } + public void Visit(LargeListType type) + { + Flatbuf.LargeList.StartLargeList(Builder); + Result = FieldType.Build( + Flatbuf.Type.LargeList, + Flatbuf.LargeList.EndLargeList(Builder)); + } + public void Visit(FixedSizeListType type) { Result = FieldType.Build( @@ -166,6 +185,14 @@ public void Visit(StringViewType type) Flatbuf.Type.Utf8View, offset); } + public void Visit(LargeStringType type) + { + Flatbuf.LargeUtf8.StartLargeUtf8(Builder); + Offset offset = Flatbuf.LargeUtf8.EndLargeUtf8(Builder); + Result = FieldType.Build( + Flatbuf.Type.LargeUtf8, offset); + } + public void Visit(TimestampType type) { StringOffset timezoneStringOffset = default; @@ -363,7 +390,7 @@ private static Flatbuf.IntervalUnit ToFlatBuffer(Types.IntervalUnit unit) Types.IntervalUnit.DayTime => Flatbuf.IntervalUnit.DAY_TIME, Types.IntervalUnit.MonthDayNanosecond => Flatbuf.IntervalUnit.MONTH_DAY_NANO, _ => throw new ArgumentException($"unsupported interval unit <{unit}>", nameof(unit)) - }; ; + }; } } } diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 0e6f330aef091..8e15632c517e1 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -186,6 +186,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c return Types.StringType.Default; case Flatbuf.Type.Utf8View: return Types.StringViewType.Default; + case Flatbuf.Type.LargeUtf8: + return Types.LargeStringType.Default; case Flatbuf.Type.FixedSizeBinary: Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type().Value; return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth); @@ -193,6 +195,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c return Types.BinaryType.Default; case Flatbuf.Type.BinaryView: return Types.BinaryViewType.Default; + case Flatbuf.Type.LargeBinary: + return Types.LargeBinaryType.Default; case Flatbuf.Type.List: if (childFields == null || childFields.Length != 1) { @@ -205,6 +209,12 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c throw new InvalidDataException($"List view type must have exactly one child."); } return new Types.ListViewType(childFields[0]); + case Flatbuf.Type.LargeList: + if (childFields == null || childFields.Length != 1) + { + throw new InvalidDataException($"Large list type must have exactly one child."); + } + return new Types.LargeListType(childFields[0]); case Flatbuf.Type.FixedSizeList: if (childFields == null || childFields.Length != 1) { diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs index cf520391fe1e6..7a3159a1bbccd 100644 --- a/csharp/src/Apache.Arrow/Types/IArrowType.cs +++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs @@ -53,6 +53,9 @@ public enum ArrowTypeId BinaryView, StringView, ListView, + LargeList, + LargeBinary, + LargeString, } public interface IArrowType diff --git a/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs b/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs new file mode 100644 index 0000000000000..e22c333824480 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types; + +public class LargeBinaryType: ArrowType +{ + public static readonly LargeBinaryType Default = new LargeBinaryType(); + + public override ArrowTypeId TypeId => ArrowTypeId.LargeBinary; + + public override string Name => "large_binary"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); +} diff --git a/csharp/src/Apache.Arrow/Types/LargeListType.cs b/csharp/src/Apache.Arrow/Types/LargeListType.cs new file mode 100644 index 0000000000000..2fe8166972931 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/LargeListType.cs @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class LargeListType : NestedType + { + public override ArrowTypeId TypeId => ArrowTypeId.LargeList; + + public override string Name => "large_list"; + + public Field ValueField => Fields[0]; + + public IArrowType ValueDataType => Fields[0].DataType; + + public LargeListType(Field valueField) + : base(valueField) { } + + public LargeListType(IArrowType valueDataType) + : this(new Field("item", valueDataType, true)) { } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/csharp/src/Apache.Arrow/Types/LargeStringType.cs b/csharp/src/Apache.Arrow/Types/LargeStringType.cs new file mode 100644 index 0000000000000..8698ca4747a0e --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/LargeStringType.cs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types; + +public sealed class LargeStringType : ArrowType +{ + public static readonly LargeStringType Default = new LargeStringType(); + + public override ArrowTypeId TypeId => ArrowTypeId.LargeString; + + public override string Name => "large_utf8"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); +} diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 7232f74b8bec6..c9e44b8d2f491 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -177,8 +177,10 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "decimal" => ToDecimalArrowType(type), "binary" => BinaryType.Default, "binaryview" => BinaryViewType.Default, + "largebinary" => LargeBinaryType.Default, "utf8" => StringType.Default, "utf8view" => StringViewType.Default, + "largeutf8" => LargeStringType.Default, "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth), "date" => ToDateArrowType(type), "time" => ToTimeArrowType(type), @@ -188,6 +190,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "timestamp" => ToTimestampArrowType(type), "list" => ToListArrowType(type, children), "listview" => ToListViewArrowType(type, children), + "largelist" => ToLargeListArrowType(type, children), "fixedsizelist" => ToFixedSizeListArrowType(type, children), "struct" => ToStructArrowType(type, children), "union" => ToUnionArrowType(type, children), @@ -303,6 +306,11 @@ private static IArrowType ToListViewArrowType(JsonArrowType type, Field[] childr return new ListViewType(children[0]); } + private static IArrowType ToLargeListArrowType(JsonArrowType type, Field[] children) + { + return new LargeListType(children[0]); + } + private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children) { return new FixedSizeListType(children[0], type.ListSize); @@ -461,11 +469,14 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -696,6 +707,24 @@ public void Visit(StringViewType type) Array = new StringViewArray(arrayData); } + public void Visit(LargeStringType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetLargeOffsetBuffer(); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize(json, s_options); + + ArrowBuffer.Builder valueBuilder = new ArrowBuffer.Builder(); + foreach (string value in values) + { + valueBuilder.Append(Encoding.UTF8.GetBytes(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + Array = new LargeStringArray(JsonFieldData.Count, offsetBuffer, valueBuffer, validityBuffer, nullCount); + } + public void Visit(BinaryType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -747,6 +776,25 @@ public void Visit(BinaryViewType type) Array = new BinaryViewArray(arrayData); } + public void Visit(LargeBinaryType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetLargeOffsetBuffer(); + + var json = JsonFieldData.Data.GetRawText(); + string[] values = JsonSerializer.Deserialize(json, s_options); + + ArrowBuffer.Builder valueBuilder = new ArrowBuffer.Builder(); + foreach (string value in values) + { + valueBuilder.Append(ConvertHexStringToByteArray(value)); + } + ArrowBuffer valueBuffer = valueBuilder.Build(default); + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, new[] { validityBuffer, offsetBuffer, valueBuffer }); + Array = new LargeBinaryArray(arrayData); + } + public void Visit(FixedSizeBinaryType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -796,6 +844,21 @@ public void Visit(ListViewType type) Array = new ListViewArray(arrayData); } + public void Visit(LargeListType type) + { + ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); + ArrowBuffer offsetBuffer = GetLargeOffsetBuffer(); + + var data = JsonFieldData; + JsonFieldData = data.Children[0]; + type.ValueDataType.Accept(this); + JsonFieldData = data; + + ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, + new[] { validityBuffer, offsetBuffer }, new[] { Array.Data }); + Array = new LargeListArray(arrayData); + } + public void Visit(FixedSizeListType type) { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -975,6 +1038,13 @@ private ArrowBuffer GetOffsetBuffer() return valueOffsets.Build(default); } + private ArrowBuffer GetLargeOffsetBuffer() + { + ArrowBuffer.Builder valueOffsets = new ArrowBuffer.Builder(JsonFieldData.Offset.Count); + valueOffsets.AppendRange(JsonFieldData.LongOffset); + return valueOffsets.Build(default); + } + private ArrowBuffer GetSizeBuffer() { ArrowBuffer.Builder valueSizes = new ArrowBuffer.Builder(JsonFieldData.Size.Count); @@ -1039,6 +1109,12 @@ public IEnumerable IntOffset get { return Offset.Select(GetInt); } } + [JsonIgnore] + public IEnumerable LongOffset + { + get { return Offset.Select(GetLong); } + } + [JsonIgnore] public IEnumerable IntSize { @@ -1056,6 +1132,18 @@ static int GetInt(JsonNode node) return int.Parse(node.GetValue()); } } + + static long GetLong(JsonNode node) + { + try + { + return node.GetValue(); + } + catch + { + return long.Parse(node.GetValue()); + } + } } public class JsonView diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 5c33d1fd43986..85f7b75f931ef 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -95,12 +95,15 @@ private class ArrayComparer : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, @@ -144,14 +147,17 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare) public void Visit(MonthDayNanosecondIntervalArray array) => CompareArrays(array); public void Visit(ListArray array) => CompareArrays(array); public void Visit(ListViewArray array) => CompareArrays(array); + public void Visit(LargeListArray array) => CompareArrays(array); public void Visit(FixedSizeListArray array) => CompareArrays(array); public void Visit(FixedSizeBinaryArray array) => CompareArrays(array); public void Visit(Decimal128Array array) => CompareArrays(array); public void Visit(Decimal256Array array) => CompareArrays(array); public void Visit(StringArray array) => CompareBinaryArrays(array); public void Visit(StringViewArray array) => CompareVariadicArrays(array); + public void Visit(LargeStringArray array) => CompareLargeBinaryArrays(array); public void Visit(BinaryArray array) => CompareBinaryArrays(array); public void Visit(BinaryViewArray array) => CompareVariadicArrays(array); + public void Visit(LargeBinaryArray array) => CompareLargeBinaryArrays(array); public void Visit(StructArray array) { @@ -276,6 +282,40 @@ private void CompareBinaryArrays(BinaryArray actualArray) } } + private void CompareLargeBinaryArrays(LargeBinaryArray actualArray) + where T : IArrowArray + { + Assert.IsAssignableFrom(_expectedArray); + Assert.IsAssignableFrom(actualArray); + + var expectedArray = (LargeBinaryArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + + CompareValidityBuffer( + expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, + expectedArray.Offset, actualArray.NullBitmapBuffer, actualArray.Offset); + + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + Assert.True(expectedArray.ValueBuffer.Span.Slice(0, expectedArray.Length).SequenceEqual(actualArray.ValueBuffer.Span.Slice(0, actualArray.Length))); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.True( + expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)), + $"LargeBinaryArray values do not match at index {i}."); + } + } + } + private void CompareVariadicArrays(BinaryViewArray actualArray) where T : IArrowArray { @@ -469,6 +509,44 @@ private void CompareArrays(ListViewArray actualArray) } } + private void CompareArrays(LargeListArray actualArray) + { + Assert.IsAssignableFrom(_expectedArray); + LargeListArray expectedArray = (LargeListArray)_expectedArray; + + actualArray.Data.DataType.Accept(_arrayTypeComparer); + + Assert.Equal(expectedArray.Length, actualArray.Length); + Assert.Equal(expectedArray.NullCount, actualArray.NullCount); + + CompareValidityBuffer( + expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, + expectedArray.Offset, actualArray.NullBitmapBuffer, actualArray.Offset); + + if (_strictCompare) + { + Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span)); + actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); + } + else + { + for (int i = 0; i < actualArray.Length; ++i) + { + if (expectedArray.IsNull(i)) + { + Assert.True(actualArray.IsNull(i)); + } + else + { + var expectedList = expectedArray.GetSlicedValues(i); + var actualList = actualArray.GetSlicedValues(i); + actualList.Accept(new ArrayComparer(expectedList, _strictCompare)); + } + } + } + } + private void CompareArrays(FixedSizeListArray actualArray) { Assert.IsAssignableFrom(_expectedArray); diff --git a/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs new file mode 100644 index 0000000000000..4ee1f1d0e0ffa --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class LargeBinaryArrayTests +{ + [Fact] + public void GetBytesReturnsCorrectValue() + { + var byteArrays = new byte[][] + { + new byte[] {0, 1, 2, 255}, + new byte[] {3, 4, 5}, + new byte[] {}, + null, + new byte[] {254, 253, 252}, + }; + var array = BuildArray(byteArrays); + + Assert.Equal(array.Length, byteArrays.Length); + for (var i = 0; i < byteArrays.Length; ++i) + { + var byteSpan = array.GetBytes(i, out var isNull); + var byteArray = isNull ? null : byteSpan.ToArray(); + Assert.Equal(byteArrays[i], byteArray); + } + } + + [Fact] + public void GetBytesChecksForOffsetOverflow() + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + offsetBuffer.Append((long)int.MaxValue + 1); + validityBuffer.Append(true); + + var array = new LargeBinaryArray( + LargeBinaryType.Default, length: 1, + offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + + Assert.Throws(() => array.GetBytes(0)); + } + + private static LargeBinaryArray BuildArray(IReadOnlyCollection byteArrays) + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + foreach (var bytes in byteArrays) + { + if (bytes == null) + { + validityBuffer.Append(false); + offsetBuffer.Append(offset); + } + else + { + valueBuffer.Append(bytes); + offset += bytes.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + } + } + + return new LargeBinaryArray( + LargeBinaryType.Default, byteArrays.Count, + offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs new file mode 100644 index 0000000000000..1d35a8ffd62c5 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class LargeListArrayTests +{ + [Fact] + public void GetSlicedValuesReturnsCorrectValues() + { + var values = new int?[][] + { + new int?[] {0, 1, 2}, + System.Array.Empty(), + null, + new int?[] {3, 4, null, 6}, + }; + + var array = BuildArray(values); + + Assert.Equal(values.Length, array.Length); + for (int i = 0; i < values.Length; ++i) + { + Assert.Equal(values[i] == null, array.IsNull(i)); + var arrayItem = (Int32Array) array.GetSlicedValues(i); + if (values[i] == null) + { + Assert.Null(arrayItem); + } + else + { + Assert.Equal(values[i], arrayItem.ToArray()); + } + } + } + + [Fact] + public void GetSlicedValuesChecksForOffsetOverflow() + { + var valuesArray = new Int32Array.Builder().Build(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + offsetBuffer.Append((long)int.MaxValue + 1); + validityBuffer.Append(true); + + var array = new LargeListArray( + new LargeListType(new Int32Type()), length: 1, + offsetBuffer.Build(), valuesArray, validityBuffer.Build(), + validityBuffer.UnsetBitCount); + + Assert.Throws(() => array.GetSlicedValues(0)); + } + + private static LargeListArray BuildArray(int?[][] values) + { + var valuesBuilder = new Int32Array.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + foreach (var listValue in values) + { + if (listValue == null) + { + validityBuffer.Append(false); + offsetBuffer.Append(offset); + } + else + { + foreach (var value in listValue) + { + valuesBuilder.Append(value); + } + offset += listValue.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + } + } + + return new LargeListArray( + new LargeListType(new Int32Type()), values.Length, + offsetBuffer.Build(), valuesBuilder.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs new file mode 100644 index 0000000000000..aba97ba338c75 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class LargeStringArrayTests +{ + [Fact] + public void GetStringReturnsCorrectValue() + { + var strings = new string[] + { + "abc", + "defg", + "", + null, + "123", + }; + var array = BuildArray(strings); + + Assert.Equal(array.Length, strings.Length); + for (var i = 0; i < strings.Length; ++i) + { + Assert.Equal(strings[i], array.GetString(i)); + } + } + + [Fact] + public void GetStringChecksForOffsetOverflow() + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + offsetBuffer.Append((long)int.MaxValue + 1); + validityBuffer.Append(true); + + var array = new LargeStringArray( + length: 1, offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + + Assert.Throws(() => array.GetString(0)); + } + + private static LargeStringArray BuildArray(IReadOnlyCollection strings) + { + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + foreach (var value in strings) + { + if (value == null) + { + validityBuffer.Append(false); + offsetBuffer.Append(offset); + } + else + { + var bytes = LargeStringArray.DefaultEncoding.GetBytes(value); + valueBuffer.Append(bytes); + offset += value.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + } + } + + return new LargeStringArray( + strings.Count, offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(), + validityBuffer.UnsetBitCount); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs index 83c88265d172b..35fbe7cba68f1 100644 --- a/csharp/test/Apache.Arrow.Tests/TableTests.cs +++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs @@ -63,9 +63,9 @@ public void TestTableFromRecordBatches() Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches); Assert.Equal(20, table1.RowCount); #if NET5_0_OR_GREATER - Assert.Equal(35, table1.ColumnCount); + Assert.Equal(38, table1.ColumnCount); #else - Assert.Equal(34, table1.ColumnCount); + Assert.Equal(37, table1.ColumnCount); #endif Assert.Equal("ChunkedArray: Length=20, DataType=list", table1.Column(0).Data.ToString()); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index 3ea42ee0fbcb7..36969766aeae0 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -49,6 +49,7 @@ void AddField(Field field) { AddField(CreateField(new ListType(Int64Type.Default), i)); AddField(CreateField(new ListViewType(Int64Type.Default), i)); + AddField(CreateField(new LargeListType(Int64Type.Default), i)); AddField(CreateField(BooleanType.Default, i)); AddField(CreateField(UInt8Type.Default, i)); AddField(CreateField(Int8Type.Default, i)); @@ -84,6 +85,8 @@ void AddField(Field field) AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i)); AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i)); AddField(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i)); + AddField(CreateField(new LargeBinaryType(), i)); + AddField(CreateField(new LargeStringType(), i)); } Schema schema = builder.Build(); @@ -144,8 +147,10 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -154,6 +159,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -335,6 +341,45 @@ public void Visit(StringViewType type) Array = builder.Build(); } + public void Visit(LargeStringType type) + { + var str = "hello"; + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + long offset = 0; + offsetBuffer.Append(offset); + + for (var i = 0; i < Length; i++) + { + switch (i % 3) + { + case 0: + offsetBuffer.Append(offset); + validityBuffer.Append(false); + break; + case 1: + valueBuffer.Append(LargeStringArray.DefaultEncoding.GetBytes(str)); + offset += str.Length; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + break; + case 2: + valueBuffer.Append(LargeStringArray.DefaultEncoding.GetBytes(str + str)); + offset += str.Length * 2; + offsetBuffer.Append(offset); + validityBuffer.Append(true); + break; + } + } + + var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty; + Array = new LargeStringArray( + Length, offsetBuffer.Build(), valueBuffer.Build(), validity, + validityBuffer.UnsetBitCount); + } + public void Visit(ListType type) { var builder = new ListArray.Builder(type.ValueField).Reserve(Length); @@ -379,6 +424,37 @@ public void Visit(ListViewType type) Array = builder.Build(); } + public void Visit(LargeListType type) + { + var valueBuilder = new Int64Array.Builder().Reserve(Length * 3 / 2); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0); + + for (var i = 0; i < Length; i++) + { + if (i % 10 == 2) + { + offsetBuffer.Append(valueBuilder.Length); + validityBuffer.Append(false); + } + else + { + var listLength = i % 4; + valueBuilder.AppendRange(Enumerable.Range(i, listLength).Select(x => (long)x)); + offsetBuffer.Append(valueBuilder.Length); + validityBuffer.Append(true); + } + } + + var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty; + Array = new LargeListArray( + new LargeListType(new Int64Type()), Length, + offsetBuffer.Build(), valueBuilder.Build(), validity, + validityBuffer.UnsetBitCount); + } + public void Visit(FixedSizeListType type) { var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length); @@ -554,6 +630,48 @@ public void Visit(BinaryViewType type) Array = builder.Build(); } + public void Visit(LargeBinaryType type) + { + ReadOnlySpan shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 }; + ReadOnlySpan longData = new[] + { + (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9, + (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19 + }; + var valueBuffer = new ArrowBuffer.Builder(); + var offsetBuffer = new ArrowBuffer.Builder(); + var validityBuffer = new ArrowBuffer.BitmapBuilder(); + + offsetBuffer.Append(0L); + + for (var i = 0; i < Length; i++) + { + switch (i % 3) + { + case 0: + offsetBuffer.Append(valueBuffer.Length); + validityBuffer.Append(false); + break; + case 1: + valueBuffer.Append(shortData); + offsetBuffer.Append(valueBuffer.Length); + validityBuffer.Append(true); + break; + case 2: + valueBuffer.Append(longData); + offsetBuffer.Append(valueBuffer.Length); + validityBuffer.Append(true); + break; + } + } + + var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty; + Array = new LargeBinaryArray( + LargeBinaryType.Default, Length, + offsetBuffer.Build(), valueBuffer.Build(), validity, + validityBuffer.UnsetBitCount); + } + public void Visit(FixedSizeBinaryType type) { ArrowBuffer.Builder valueBuilder = new ArrowBuffer.Builder(); diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index b51f3d876f820..47310c905a9ff 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1872,8 +1872,7 @@ def _temp_path(): generate_primitive_case([17, 20], name='primitive'), generate_primitive_case([0, 0, 0], name='primitive_zerolength'), - generate_primitive_large_offsets_case([17, 20]) - .skip_tester('C#'), + generate_primitive_large_offsets_case([17, 20]), generate_null_case([10, 0]), @@ -1906,7 +1905,6 @@ def _temp_path(): generate_recursive_nested_case(), generate_nested_large_offsets_case() - .skip_tester('C#') .skip_tester('JS'), generate_unions_case(), diff --git a/docs/source/status.rst b/docs/source/status.rst index 266381175608a..c232aa280befb 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -62,11 +62,11 @@ Data Types +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ -| Large Binary | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | +| Large Binary | ✓ | ✓ | ✓ | ✓ | \(4) | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ -| Large Utf8 | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | ✓ | +| Large Utf8 | ✓ | ✓ | ✓ | ✓ | \(4) | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | Binary View | ✓ | | ✓ | | ✓ | | | | | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ @@ -85,7 +85,7 @@ Data Types +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | List | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ -| Large List | ✓ | ✓ | ✓ | | | ✓ | ✓ | | ✓ | +| Large List | ✓ | ✓ | ✓ | | \(4) | ✓ | ✓ | | ✓ | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ | List View | ✓ | | ✓ | | ✓ | | | | | +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+ @@ -125,6 +125,8 @@ Notes: * \(1) Casting to/from Float16 in Java is not supported. * \(2) Float16 support in C# is only available when targeting .NET 6+. * \(3) Nested dictionaries not supported +* \(4) C# large array types are provided to help with interoperability with other libraries, + but these do not support buffers larger than 2 GiB and an exception will be raised if trying to import an array that is too large. .. seealso:: The :ref:`format_columnar` and the From e96c6395eee87776c59d83a655d951fe04391d7e Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 19 Jul 2024 09:12:49 -0300 Subject: [PATCH 095/122] GH-43331: [C++] Add missing serde methods to Location (#43332) ### Rationale for this change Completeness and fixing a build issue that only affects MSVC. ### What changes are included in this PR? The fix. ### Are these changes tested? By existing tests. Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/flight/serialization_internal.h | 1 + cpp/src/arrow/flight/types.cc | 8 ++++++++ cpp/src/arrow/flight/types.h | 13 +++++++++++++ 3 files changed, 22 insertions(+) diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h index ffde47d43c00e..9922cb61ac004 100644 --- a/cpp/src/arrow/flight/serialization_internal.h +++ b/cpp/src/arrow/flight/serialization_internal.h @@ -94,6 +94,7 @@ Status ToProto(const Result& result, pb::Result* pb_result); Status ToProto(const CancelFlightInfoResult& result, pb::CancelFlightInfoResult* pb_result); Status ToProto(const Criteria& criteria, pb::Criteria* pb_criteria); +Status ToProto(const Location& location, pb::Location* pb_location); Status ToProto(const SchemaResult& result, pb::SchemaResult* pb_result); Status ToProto(const Ticket& ticket, pb::Ticket* pb_ticket); Status ToProto(const BasicAuth& basic_auth, pb::BasicAuth* pb_basic_auth); diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index 170fbfe2393f9..dc1173ebe18db 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -775,6 +775,14 @@ bool FlightEndpoint::Equals(const FlightEndpoint& other) const { return true; } +arrow::Status Location::SerializeToString(std::string* out) const { + return SerializeToProtoString("Location", *this, out); +} + +arrow::Status Location::Deserialize(std::string_view serialized, Location* out) { + return DeserializeProtoString("Location", serialized, out); +} + arrow::Status FlightEndpoint::SerializeToString(std::string* out) const { return SerializeToProtoString("FlightEndpoint", *this, out); } diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index 8061df4ddeb84..bc8e234d977b1 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -555,6 +555,19 @@ struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType { bool Equals(const Location& other) const; + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Location* out); + private: friend class FlightClient; friend class FlightServerBase; From ade8b07583f25a0a159c38fa512c00add6815661 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Fri, 19 Jul 2024 10:05:00 -0400 Subject: [PATCH 096/122] MINOR: [Docs][Java] Add Apache Staging url to docs (#43324) ### Rationale for this change * The JAR staging location is not documented, let's fix that. ### What changes are included in this PR? * Add Arrow Java Apache Staging URL to docs. * Remove lingering JDK 8 references. ### Are these changes tested? Yes - screenshot shared below ### Are there any user-facing changes? Yes, improved documentation on the Arrow website. Authored-by: Dane Pitkin Signed-off-by: Dane Pitkin --- docs/source/developers/java/building.rst | 38 +++++++++++++++++++++++- docs/source/java/install.rst | 4 +-- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index 5ee80211584a0..63a7b4369b809 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -335,7 +335,6 @@ Arrow repository, and update the following settings: right click the directory, and select Mark Directory as > Generated Sources Root. There is no need to mark other generated sources directories, as only the ``vector`` module generates sources. -* For JDK 8, disable the ``error-prone`` profile to build the project successfully. * For JDK 11, due to an `IntelliJ bug `__, you must go into Settings > Build, Execution, Deployment > Compiler > Java Compiler and disable @@ -538,3 +537,40 @@ Installing Manually .. _builds@arrow.apache.org: https://lists.apache.org/list.html?builds@arrow.apache.org .. _GitHub Nightly: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars + +Installing Staging Packages +=========================== + +.. warning:: + These packages are not official releases. Use them at your own risk. + +Arrow staging builds are created when a Release Candidate (RC) is being prepared. This allows users to test the RC in their applications before voting on the release. + + +Installing from Apache Staging +-------------------------------- +1. Look up the next version number for the Arrow libraries used. + +2. Add Apache Staging Repository to the Maven/Gradle project. + + .. code-block:: xml + + + 9.0.0 + + ... + + + arrow-apache-staging + https://repository.apache.org/content/repositories/staging + + + ... + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + ... diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index 95307c8c1c5fd..3e01f72a56878 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -29,8 +29,8 @@ Java modules are regularly built and tested on macOS and Linux distributions. Java Compatibility ================== -Java modules are compatible with JDK 8 and above. Currently, JDK versions -11, 17, and 21 are tested in CI. The latest JDK is also tested in CI. +Java modules are compatible with JDK 11 and above. Currently, JDK versions +11, 17, 21, and latest are tested in CI. Note that some JDK internals must be exposed by adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED`` to the ``java`` command: From ed67a4283bb069bb31a37346a05a0ddaece9a1be Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Fri, 19 Jul 2024 10:28:07 -0400 Subject: [PATCH 097/122] GH-39789: [Go][Parquet] Close current row group when finished writing unbuffered batch (#43326) ### Rationale for this change Fixes: #39789 The number of bytes reported by `FileWriter.RowGroupTotalBytesWritten()` was consistently lower than the actual bytes in the output buffer, if it was read before closing the writer. The issue is that the last column's data page was not flushed until the entire writer was closed, causing it's bytes not to be included in the total. By closing the row group writer before returning from `Write()`, we can ensure all pages are flushed and the totalBytesWritten will be accurate. ### What changes are included in this PR? - Close row group writer before returning from `FileWriter.Write()` - Test to ensure stats are up to date _before_ closing the writer ### Are these changes tested? Yes ### Are there any user-facing changes? `FileWriter.RowGroupTotalBytesWritten()` will be accurate when read while still writing to the file. * GitHub Issue: #39789 Authored-by: Joel Lubinitsky Signed-off-by: Matt Topol --- go/parquet/pqarrow/file_writer.go | 2 +- go/parquet/pqarrow/file_writer_test.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go index 891b757f5eb51..539c544829e3b 100644 --- a/go/parquet/pqarrow/file_writer.go +++ b/go/parquet/pqarrow/file_writer.go @@ -246,7 +246,7 @@ func (fw *FileWriter) Write(rec arrow.Record) error { } } fw.colIdx = 0 - return nil + return fw.rgw.Close() } // WriteTable writes an arrow table to the underlying file using chunkSize to determine diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go index 25ef3879e7811..5b807389a3eb1 100644 --- a/go/parquet/pqarrow/file_writer_test.go +++ b/go/parquet/pqarrow/file_writer_test.go @@ -55,7 +55,11 @@ func TestFileWriterRowGroupNumRows(t *testing.T) { numRows, err := writer.RowGroupNumRows() require.NoError(t, err) assert.Equal(t, 4, numRows) + + // Make sure that row group stats are up-to-date immediately after writing + bytesWritten := writer.RowGroupTotalBytesWritten() require.NoError(t, writer.Close()) + require.Equal(t, bytesWritten, writer.RowGroupTotalBytesWritten()) } func TestFileWriterNumRows(t *testing.T) { From fe5102981b762e386bc882008d587103d58cf66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Sat, 20 Jul 2024 22:47:40 +0200 Subject: [PATCH 098/122] GH-43284: [Release] Fix version detection timing for bump deb package names on post-12-bump-versions.sh script (#43294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The script fails to bump deb package names at the moment due to an unmatching if. ### What changes are included in this PR? Use current_version before bumping the versions in order to match debian packages to be updated. ### Are these changes tested? It has been tested locally to bump the versions for the debian package names on main for 17.0.0 ### Are there any user-facing changes? No * GitHub Issue: #43284 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- dev/release/post-12-bump-versions-test.rb | 11 +++++++++-- dev/release/post-12-bump-versions.sh | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb index 2bd14587461cc..f31e1a3122814 100644 --- a/dev/release/post-12-bump-versions-test.rb +++ b/dev/release/post-12-bump-versions-test.rb @@ -358,8 +358,15 @@ def test_version_post_tag def test_deb_package_names omit_on_release_branch unless bump_type.nil? current_commit = git_current_commit - stdout = bump_versions("DEB_PACKAGE_NAMES") - changes = parse_patch(git("log", "-p", "#{current_commit}..")) + stdout = bump_versions("VERSION_POST_TAG", "DEB_PACKAGE_NAMES") + log = git("log", "-p", "#{current_commit}..") + # Remove a commit for VERSION_POST_TAG + if log.scan(/^commit/).size == 1 + log = "" + else + log.gsub!(/\A(commit.*?)^commit .*\z/um, "\\1") + end + changes = parse_patch(log) sampled_changes = changes.collect do |change| first_hunk = change[:hunks][0] first_removed_line = first_hunk.find { |line| line.start_with?("-") } diff --git a/dev/release/post-12-bump-versions.sh b/dev/release/post-12-bump-versions.sh index 422821a66bde5..bf40f4ce5c4ea 100755 --- a/dev/release/post-12-bump-versions.sh +++ b/dev/release/post-12-bump-versions.sh @@ -40,6 +40,7 @@ fi version=$1 next_version=$2 next_version_snapshot="${next_version}-SNAPSHOT" +current_version_before_bump="$(current_version)" case "${version}" in *.0.0) @@ -64,7 +65,7 @@ if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then fi if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ] && \ - [ "${next_version}" != "$(current_version)" ]; then + [ "${next_version}" != "${current_version_before_bump}" ]; then update_deb_package_names "${version}" "${next_version}" fi From c3ebdf500e75ca868f50b7d374fc8ce2237756b8 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 20 Jul 2024 20:07:48 -0500 Subject: [PATCH 099/122] MINOR: [R] add back `dplyr::` to `left_join` calls (#43348) Necessary for a clean check. These were inadvertently taken out in #41576 and don't actually change any code, just appeases the static checker that CRAN runs. Authored-by: Jonathan Keane Signed-off-by: Jonathan Keane --- r/R/dplyr-mutate.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index fcb1cedbbb168..03659f5735708 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -77,12 +77,12 @@ mutate.arrow_dplyr_query <- function(.data, agg_query$aggregations <- mask$.aggregations agg_query <- collapse.arrow_dplyr_query(agg_query) if (length(grv)) { - out <- left_join(out, agg_query, by = grv) + out <- dplyr::left_join(out, agg_query, by = grv) } else { # If there are no group_by vars, add a scalar column to both and join on that agg_query$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) out$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) - out <- left_join(out, agg_query, by = "..tempjoin") + out <- dplyr::left_join(out, agg_query, by = "..tempjoin") } } From 406c66b1ec8f19d61d20af31432674ea81f88c48 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sun, 21 Jul 2024 19:55:17 +0100 Subject: [PATCH 100/122] GH-43319: [R][Docs] Update packaging checklist (#43345) ### What changes are included in this PR? Remove outdated info and minor updates * GitHub Issue: #43319 Authored-by: Jacob Wujciak-Jens Signed-off-by: Jacob Wujciak-Jens --- r/PACKAGING.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/r/PACKAGING.md b/r/PACKAGING.md index abe86a62fdc4f..7f6f80745bd96 100644 --- a/r/PACKAGING.md +++ b/r/PACKAGING.md @@ -32,7 +32,7 @@ For a high-level overview of the release process see the - [ ] Ensure the contents of the README are accurate and up to date. - [ ] Run `urlchecker::url_check()` on the R directory at the release candidate. commit. Ignore any errors with badges as they will be removed in the CRAN release branch. -- [ ] [Polish NEWS](https://style.tidyverse.org/news.html#news-release) but do **not** update version numbers (this is done automatically later). You can find commits by, for example, `git log --oneline aa057d0..HEAD | grep "\[R\]"` +- [ ] [Polish NEWS](https://style.tidyverse.org/news.html#news-release) but do **not** update version numbers (this is done automatically later). You can find commits by, for example, `git log --oneline ..HEAD | grep "\[R\]"` - [ ] Run preliminary reverse dependency checks using `archery docker run r-revdepcheck`. - [ ] For major releases, prepare tweet thread highlighting new features. @@ -72,15 +72,10 @@ Wait for the release candidate to be cut: - [ ] Run `Rscript tools/update-checksums.R ` to download the checksums for the pre-compiled binaries from the ASF artifactory into the tools directory. - [ ] Regenerate arrow_X.X.X.tar.gz (i.e., `make build`) -Ensure linux binary packages are available: -- [ ] Ensure linux binaries are available in the artifactory: - https://apache.jfrog.io/ui/repos/tree/General/arrow/r - ## Check binary Arrow C++ distributions specific to the R package - [ ] Upload the .tar.gz to [win-builder](https://win-builder.r-project.org/upload.aspx) (r-devel only) - and confirm (with Nic, who will automatically receive an email about the results) that the check is clean. - This step cannot be completed before Jeroen has put the binaries in the MinGW repository, i.e. [here](https://ftp.opencpu.org/rtools/ucrt64/), [here](https://ftp.opencpu.org/rtools/mingw64/), and [here](https://ftp.opencpu.org/rtools/mingw32/). + and confirm (with Jon, who will automatically receive an email about the results) that the check is clean. - [ ] Upload the .tar.gz to [MacBuilder](https://mac.r-project.org/macbuilder/submit.html) and confirm that the check is clean - [ ] Check `install.packages("arrow_X.X.X.tar.gz")` on Ubuntu and ensure that the @@ -105,3 +100,4 @@ Wait for CRAN... [CRAN package page](https://cran.r-project.org/package=arrow) to reflect the new version - [ ] Tweet! + - Use Bryce's [script](https://gist.githubusercontent.com/amoeba/4e26c064d1a0d0227cd8c2260cf0072a/raw/bc0d983152bdde4820de9074d4caee9986624bc5/new_contributors.R) for contributor calculation. From 79910fed17e0a7f17c6bd5f369a43ac9ae1a376d Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sun, 21 Jul 2024 19:55:47 +0100 Subject: [PATCH 101/122] MINOR: [R] Update NEWS.md for 17.0.0 (#43344) ### What changes are included in this PR? Minor formats and updates of R News for 17.0.0 Authored-by: Jacob Wujciak-Jens Signed-off-by: Jacob Wujciak-Jens --- r/NEWS.md | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/r/NEWS.md b/r/NEWS.md index 6159f3863c375..0e6e4634a0af8 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -21,12 +21,31 @@ # arrow 17.0.0 -* R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) +## New features + +* R functions that users write that use functions that Arrow supports in dataset + queries now can be used in queries too. Previously, only functions that used + arithmetic operators worked. + For example, `time_hours <- function(mins) mins / 60` worked, + but `time_hours_rounded <- function(mins) round(mins / 60)` did not; + now both work. These are automatic translations rather than true user-defined + functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) * `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350) -* `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. -* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) -* R metadata, stored in the Arrow schema to support round-tripping data between R and Arrow/Parquet, is now serialized and deserialized more strictly. This makes it safer to load data from files from unknown sources into R data.frames. (#41969) -* The minimum version of the Arrow C++ library the Arrow R package can be built with has been bumped to 15.0.0 (#42241) +* `summarize()` supports more complex expressions, and correctly handles cases + where column names are reused in expressions. +* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. + This argument controls whether `NA` values are considered equal when joining. (#41358) +* R metadata, stored in the Arrow schema to support round-tripping data between + R and Arrow/Parquet, is now serialized and deserialized more strictly. + This makes it safer to load data from files from unknown sources into R data.frames. (#41969) + +## Minor improvements and fixes +* Turn on the S3 and ZSTD features by default for macOS. (#42210) +* Fix bindings in Math group generics. (#43162) +* Fix a bug in our implementation of `pull` on grouped datasets, it now + returns the expected column. (#43172) +* The minimum version of the Arrow C++ library the Arrow R package can be built + with has been bumped to 15.0.0 (#42241) # arrow 16.1.0 From 05ab846b4f96b2ff70e2a5918cb713734b7c063b Mon Sep 17 00:00:00 2001 From: Benson Muite Date: Sun, 21 Jul 2024 23:32:48 +0300 Subject: [PATCH 102/122] GH-43346: [Docs][Format] Update broken links (#43347) ### Rationale for this change https://github.com/apache/arrow/issues/43346 ### What changes are included in this PR? Use link from internet archive and SIMD introduction from 2021 ### Are these changes tested? Links are functional ### Are there any user-facing changes? This is a minor documentation update * GitHub Issue: #43346 Authored-by: Benson Muite Signed-off-by: Sutou Kouhei --- docs/source/format/Columnar.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 7ae0c2b4bdbd8..c5f822f41643f 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -1656,8 +1656,8 @@ the Arrow spec. .. _Message.fbs: https://github.com/apache/arrow/blob/main/format/Message.fbs .. _File.fbs: https://github.com/apache/arrow/blob/main/format/File.fbs .. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering -.. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors +.. _Intel performance guide: https://web.archive.org/web/20151101074635/https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors .. _Endianness: https://en.wikipedia.org/wiki/Endianness -.. _SIMD: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-introduction-to-the-simd-data-layout-templates +.. _SIMD: https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/simd-data-layout-templates.html .. _Parquet: https://parquet.apache.org/docs/ .. _UmbraDB: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf From b763e226d098a369fe02e11cc225c67dd860991a Mon Sep 17 00:00:00 2001 From: mwish Date: Mon, 22 Jul 2024 18:54:46 +0800 Subject: [PATCH 103/122] GH-43221: [C++][Parquet] Refactor parquet::encryption::AesEncryptor to use unique_ptr (#43222) ### Rationale for this change See https://github.com/apache/arrow/issues/43221 ### What changes are included in this PR? Change raw-pointer to unique_ptr ### Are these changes tested? Covered by existing ### Are there any user-facing changes? Maybe change user interface * GitHub Issue: #43221 Authored-by: mwish Signed-off-by: Antoine Pitrou --- .../parquet/encryption/encryption_internal.cc | 15 ++++++--------- .../parquet/encryption/encryption_internal.h | 9 ++++----- .../encryption/encryption_internal_nossl.cc | 12 +++++++----- .../encryption/internal_file_decryptor.cc | 8 ++++---- .../encryption/internal_file_encryptor.cc | 19 ++++++++++++------- .../encryption/internal_file_encryptor.h | 4 +--- cpp/src/parquet/metadata.cc | 7 +++---- 7 files changed, 37 insertions(+), 37 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 6168dd2a9bd61..99d1707f4a8d4 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -469,23 +469,20 @@ AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int } } -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - std::vector* all_encryptors) { - return Make(alg_id, key_len, metadata, true /*write_length*/, all_encryptors); +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata) { + return Make(alg_id, key_len, metadata, true /*write_length*/); } -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - bool write_length, - std::vector* all_encryptors) { +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata, bool write_length) { if (ParquetCipher::AES_GCM_V1 != alg_id && ParquetCipher::AES_GCM_CTR_V1 != alg_id) { std::stringstream ss; ss << "Crypto algorithm " << alg_id << " is not supported"; throw ParquetException(ss.str()); } - AesEncryptor* encryptor = new AesEncryptor(alg_id, key_len, metadata, write_length); - if (all_encryptors != nullptr) all_encryptors->push_back(encryptor); - return encryptor; + return std::make_unique(alg_id, key_len, metadata, write_length); } AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata, diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index a9a17f1ab98e3..c874b137ad1ad 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -52,12 +52,11 @@ class PARQUET_EXPORT AesEncryptor { explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata, bool write_length = true); - static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata, - std::vector* all_encryptors); + static std::unique_ptr Make(ParquetCipher::type alg_id, int key_len, + bool metadata); - static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata, - bool write_length, - std::vector* all_encryptors); + static std::unique_ptr Make(ParquetCipher::type alg_id, int key_len, + bool metadata, bool write_length); ~AesEncryptor(); diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index 2f6cdc8200016..2cce83915d7e5 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -72,14 +72,15 @@ void AesDecryptor::WipeOut() { ThrowOpenSSLRequiredException(); } AesDecryptor::~AesDecryptor() {} -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - std::vector* all_encryptors) { +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata) { + ThrowOpenSSLRequiredException(); return NULLPTR; } -AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata, - bool write_length, - std::vector* all_encryptors) { +std::unique_ptr AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, + bool metadata, bool write_length) { + ThrowOpenSSLRequiredException(); return NULLPTR; } @@ -91,6 +92,7 @@ AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadat std::shared_ptr AesDecryptor::Make( ParquetCipher::type alg_id, int key_len, bool metadata, std::vector>* all_decryptors) { + ThrowOpenSSLRequiredException(); return NULLPTR; } diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index a900a4d2eb094..fae5ce1f7a809 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -27,7 +27,7 @@ namespace parquet { Decryptor::Decryptor(std::shared_ptr aes_decryptor, const std::string& key, const std::string& file_aad, const std::string& aad, ::arrow::MemoryPool* pool) - : aes_decryptor_(aes_decryptor), + : aes_decryptor_(std::move(aes_decryptor)), key_(key), file_aad_(file_aad), aad_(aad), @@ -156,9 +156,9 @@ std::shared_ptr InternalFileDecryptor::GetFooterDecryptor( } footer_metadata_decryptor_ = std::make_shared( - aes_metadata_decryptor, footer_key, file_aad_, aad, pool_); - footer_data_decryptor_ = - std::make_shared(aes_data_decryptor, footer_key, file_aad_, aad, pool_); + std::move(aes_metadata_decryptor), footer_key, file_aad_, aad, pool_); + footer_data_decryptor_ = std::make_shared(std::move(aes_data_decryptor), + footer_key, file_aad_, aad, pool_); if (metadata) return footer_metadata_decryptor_; return footer_data_decryptor_; diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index a423cc678cccb..285c2100be813 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -53,8 +53,15 @@ InternalFileEncryptor::InternalFileEncryptor(FileEncryptionProperties* propertie void InternalFileEncryptor::WipeOutEncryptionKeys() { properties_->WipeOutEncryptionKeys(); - for (auto const& i : all_encryptors_) { - i->WipeOut(); + for (auto const& i : meta_encryptor_) { + if (i != nullptr) { + i->WipeOut(); + } + } + for (auto const& i : data_encryptor_) { + if (i != nullptr) { + i->WipeOut(); + } } } @@ -136,7 +143,7 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( return encryptor; } -int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) { +int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) const { if (key_len == 16) return 0; else if (key_len == 24) @@ -151,8 +158,7 @@ encryption::AesEncryptor* InternalFileEncryptor::GetMetaAesEncryptor( int key_len = static_cast(key_size); int index = MapKeyLenToEncryptorArrayIndex(key_len); if (meta_encryptor_[index] == nullptr) { - meta_encryptor_[index].reset( - encryption::AesEncryptor::Make(algorithm, key_len, true, &all_encryptors_)); + meta_encryptor_[index] = encryption::AesEncryptor::Make(algorithm, key_len, true); } return meta_encryptor_[index].get(); } @@ -162,8 +168,7 @@ encryption::AesEncryptor* InternalFileEncryptor::GetDataAesEncryptor( int key_len = static_cast(key_size); int index = MapKeyLenToEncryptorArrayIndex(key_len); if (data_encryptor_[index] == nullptr) { - data_encryptor_[index].reset( - encryption::AesEncryptor::Make(algorithm, key_len, false, &all_encryptors_)); + data_encryptor_[index] = encryption::AesEncryptor::Make(algorithm, key_len, false); } return data_encryptor_[index].get(); } diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index 41ffc6fd51943..91b6e9fe5aa2f 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -88,8 +88,6 @@ class InternalFileEncryptor { std::shared_ptr footer_signing_encryptor_; std::shared_ptr footer_encryptor_; - std::vector all_encryptors_; - // Key must be 16, 24 or 32 bytes in length. Thus there could be up to three // types of meta_encryptors and data_encryptors. std::unique_ptr meta_encryptor_[3]; @@ -105,7 +103,7 @@ class InternalFileEncryptor { encryption::AesEncryptor* GetDataAesEncryptor(ParquetCipher::type algorithm, size_t key_len); - int MapKeyLenToEncryptorArrayIndex(int key_len); + int MapKeyLenToEncryptorArrayIndex(int key_len) const; }; } // namespace parquet diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 4ea3b05340d71..ee8391818962c 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -651,9 +651,9 @@ class FileMetaData::FileMetaDataImpl { std::string key = file_decryptor_->GetFooterKey(); std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); - auto aes_encryptor = encryption::AesEncryptor::Make( - file_decryptor_->algorithm(), static_cast(key.size()), true, - false /*write_length*/, nullptr); + auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(), + static_cast(key.size()), + true, false /*write_length*/); std::shared_ptr encrypted_buffer = AllocateBuffer( file_decryptor_->pool(), aes_encryptor->CiphertextLength(serialized_len)); @@ -662,7 +662,6 @@ class FileMetaData::FileMetaDataImpl { encrypted_buffer->mutable_span_as()); // Delete AES encryptor object. It was created only to verify the footer signature. aes_encryptor->WipeOut(); - delete aes_encryptor; return 0 == memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength, tag, encryption::kGcmTagLength); From 9e6acbe08a0ff5b569763d377aecc824362dd593 Mon Sep 17 00:00:00 2001 From: Oliver Layer Date: Mon, 22 Jul 2024 13:24:36 +0200 Subject: [PATCH 104/122] GH-40557: [C++] Use `PutObject` request for S3 in OutputStream when only uploading small data (#41564) ### Rationale for this change See #40557. The previous implementation would always issue multi part uploads which come with 3x RTT to S3 instead of just 1x RTT with a `PutObject` request. ### What changes are included in this PR? Implement logic in the S3 `OutputStream` to use a `PutObject` request if data is below a certain threshold (5 MB) and the output stream is closed. If more data is written, a multi part upload is triggered. Note: Previously, opening the output stream was already expensive because the `CreateMultipartUpload` request was triggered then. With this change opening the output stream becomes cheap, as we rather wait until some data is written to decide which upload method to use. This required some more state-keeping in the output stream class. ### Are these changes tested? No new tests were added, as there are already tests for very small writes and very large writes, which will trigger both ways of uploading. Everything should therefore be covered by existing tests. ### Are there any user-facing changes? - Previously, we would fail when opening the output stream if the bucket doesn't exist. We inferred that by sending the `CreateMultipartUpload` request, which we now do not send anymore upon opening the stream. We now rather fail at closing, or at writing (when >5MB have accumulated). Replicating the old behavior is not possible without sending another request which defeats the purpose of this performance optimization. I hope this is fine. * GitHub Issue: #40557 Lead-authored-by: Oliver Layer Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/filesystem/s3fs.cc | 373 ++++++++++++++++++++------ cpp/src/arrow/filesystem/s3fs.h | 10 + cpp/src/arrow/filesystem/s3fs_test.cc | 186 +++++++++---- 3 files changed, 440 insertions(+), 129 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 99cee19ed1e78..fd5b2e5be2a3a 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ #include #include #include +#include #include // AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7. @@ -1335,7 +1337,7 @@ struct ObjectMetadataSetter { static std::unordered_map GetSetters() { return {{"ACL", CannedACLSetter()}, {"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)}, - {"Content-Type", StringSetter(&ObjectRequest::SetContentType)}, + {"Content-Type", ContentTypeSetter()}, {"Content-Language", StringSetter(&ObjectRequest::SetContentLanguage)}, {"Expires", DateTimeSetter(&ObjectRequest::SetExpires)}}; } @@ -1365,6 +1367,16 @@ struct ObjectMetadataSetter { }; } + /** We need a special setter here and can not use `StringSetter` because for e.g. the + * `PutObjectRequest`, the setter is located in the base class (instead of the concrete + * class). */ + static Setter ContentTypeSetter() { + return [](const std::string& str, ObjectRequest* req) { + req->SetContentType(str); + return Status::OK(); + }; + } + static Result ParseACL(const std::string& v) { if (v.empty()) { return S3Model::ObjectCannedACL::NOT_SET; @@ -1583,6 +1595,15 @@ class ObjectInputFile final : public io::RandomAccessFile { // (for rational, see: https://github.com/apache/arrow/issues/34363) static constexpr int64_t kPartUploadSize = 10 * 1024 * 1024; +// Above this threshold, use a multi-part upload instead of a single request upload. Only +// relevant if early sanitization of writing to the bucket is disabled (see +// `allow_delayed_open`). +static constexpr int64_t kMultiPartUploadThresholdSize = kPartUploadSize - 1; + +static_assert(kMultiPartUploadThresholdSize < kPartUploadSize, + "Multi part upload threshold size must be stricly less than the actual " + "multi part upload part size."); + // An OutputStream that writes to a S3 object class ObjectOutputStream final : public io::OutputStream { protected: @@ -1598,7 +1619,8 @@ class ObjectOutputStream final : public io::OutputStream { path_(path), metadata_(metadata), default_metadata_(options.default_metadata), - background_writes_(options.background_writes) {} + background_writes_(options.background_writes), + allow_delayed_open_(options.allow_delayed_open) {} ~ObjectOutputStream() override { // For compliance with the rest of the IO stack, Close rather than Abort, @@ -1606,29 +1628,47 @@ class ObjectOutputStream final : public io::OutputStream { io::internal::CloseFromDestructor(this); } + template + Status SetMetadataInRequest(ObjectRequest* request) { + std::shared_ptr metadata; + + if (metadata_ && metadata_->size() != 0) { + metadata = metadata_; + } else if (default_metadata_ && default_metadata_->size() != 0) { + metadata = default_metadata_; + } + + bool is_content_type_set{false}; + if (metadata) { + RETURN_NOT_OK(SetObjectMetadata(metadata, request)); + + is_content_type_set = metadata->Contains("Content-Type"); + } + + if (!is_content_type_set) { + // If we do not set anything then the SDK will default to application/xml + // which confuses some tools (https://github.com/apache/arrow/issues/11934) + // So we instead default to application/octet-stream which is less misleading + request->SetContentType("application/octet-stream"); + } + + return Status::OK(); + } + std::shared_ptr Self() { return std::dynamic_pointer_cast(shared_from_this()); } - Status Init() { + Status CreateMultipartUpload() { + DCHECK(ShouldBeMultipartUpload()); + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); // Initiate the multi-part upload S3Model::CreateMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - if (metadata_ && metadata_->size() != 0) { - RETURN_NOT_OK(SetObjectMetadata(metadata_, &req)); - } else if (default_metadata_ && default_metadata_->size() != 0) { - RETURN_NOT_OK(SetObjectMetadata(default_metadata_, &req)); - } - - // If we do not set anything then the SDK will default to application/xml - // which confuses some tools (https://github.com/apache/arrow/issues/11934) - // So we instead default to application/octet-stream which is less misleading - if (!req.ContentTypeHasBeenSet()) { - req.SetContentType("application/octet-stream"); - } + RETURN_NOT_OK(SetMetadataInRequest(&req)); auto outcome = client_lock.Move()->CreateMultipartUpload(req); if (!outcome.IsSuccess()) { @@ -1637,7 +1677,19 @@ class ObjectOutputStream final : public io::OutputStream { path_.key, "' in bucket '", path_.bucket, "': "), "CreateMultipartUpload", outcome.GetError()); } - upload_id_ = outcome.GetResult().GetUploadId(); + multipart_upload_id_ = outcome.GetResult().GetUploadId(); + + return Status::OK(); + } + + Status Init() { + // If we are allowed to do delayed I/O, we can use a single request to upload the + // data. If not, we use a multi-part upload and initiate it here to + // sanitize that writing to the bucket is possible. + if (!allow_delayed_open_) { + RETURN_NOT_OK(CreateMultipartUpload()); + } + upload_state_ = std::make_shared(); closed_ = false; return Status::OK(); @@ -1648,42 +1700,62 @@ class ObjectOutputStream final : public io::OutputStream { return Status::OK(); } - ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); + if (IsMultipartCreated()) { + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); - S3Model::AbortMultipartUploadRequest req; - req.SetBucket(ToAwsString(path_.bucket)); - req.SetKey(ToAwsString(path_.key)); - req.SetUploadId(upload_id_); + S3Model::AbortMultipartUploadRequest req; + req.SetBucket(ToAwsString(path_.bucket)); + req.SetKey(ToAwsString(path_.key)); + req.SetUploadId(multipart_upload_id_); - auto outcome = client_lock.Move()->AbortMultipartUpload(req); - if (!outcome.IsSuccess()) { - return ErrorToStatus( - std::forward_as_tuple("When aborting multiple part upload for key '", path_.key, - "' in bucket '", path_.bucket, "': "), - "AbortMultipartUpload", outcome.GetError()); + auto outcome = client_lock.Move()->AbortMultipartUpload(req); + if (!outcome.IsSuccess()) { + return ErrorToStatus( + std::forward_as_tuple("When aborting multiple part upload for key '", + path_.key, "' in bucket '", path_.bucket, "': "), + "AbortMultipartUpload", outcome.GetError()); + } } + current_part_.reset(); holder_ = nullptr; closed_ = true; + return Status::OK(); } // OutputStream interface + bool ShouldBeMultipartUpload() const { + return pos_ > kMultiPartUploadThresholdSize || !allow_delayed_open_; + } + + bool IsMultipartCreated() const { return !multipart_upload_id_.empty(); } + Status EnsureReadyToFlushFromClose() { - if (current_part_) { - // Upload last part - RETURN_NOT_OK(CommitCurrentPart()); - } + if (ShouldBeMultipartUpload()) { + if (current_part_) { + // Upload last part + RETURN_NOT_OK(CommitCurrentPart()); + } - // S3 mandates at least one part, upload an empty one if necessary - if (part_number_ == 1) { - RETURN_NOT_OK(UploadPart("", 0)); + // S3 mandates at least one part, upload an empty one if necessary + if (part_number_ == 1) { + RETURN_NOT_OK(UploadPart("", 0)); + } + } else { + RETURN_NOT_OK(UploadUsingSingleRequest()); } return Status::OK(); } + Status CleanupAfterClose() { + holder_ = nullptr; + closed_ = true; + return Status::OK(); + } + Status FinishPartUploadAfterFlush() { ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); @@ -1697,7 +1769,7 @@ class ObjectOutputStream final : public io::OutputStream { S3Model::CompleteMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - req.SetUploadId(upload_id_); + req.SetUploadId(multipart_upload_id_); req.SetMultipartUpload(std::move(completed_upload)); auto outcome = @@ -1709,8 +1781,6 @@ class ObjectOutputStream final : public io::OutputStream { "CompleteMultipartUpload", outcome.GetError()); } - holder_ = nullptr; - closed_ = true; return Status::OK(); } @@ -1720,7 +1790,12 @@ class ObjectOutputStream final : public io::OutputStream { RETURN_NOT_OK(EnsureReadyToFlushFromClose()); RETURN_NOT_OK(Flush()); - return FinishPartUploadAfterFlush(); + + if (IsMultipartCreated()) { + RETURN_NOT_OK(FinishPartUploadAfterFlush()); + } + + return CleanupAfterClose(); } Future<> CloseAsync() override { @@ -1729,8 +1804,12 @@ class ObjectOutputStream final : public io::OutputStream { RETURN_NOT_OK(EnsureReadyToFlushFromClose()); // Wait for in-progress uploads to finish (if async writes are enabled) - return FlushAsync().Then( - [self = Self()]() { return self->FinishPartUploadAfterFlush(); }); + return FlushAsync().Then([self = Self()]() { + if (self->IsMultipartCreated()) { + RETURN_NOT_OK(self->FinishPartUploadAfterFlush()); + } + return self->CleanupAfterClose(); + }); } bool closed() const override { return closed_; } @@ -1776,7 +1855,8 @@ class ObjectOutputStream final : public io::OutputStream { return Status::OK(); } - // Upload current buffer + // Upload current buffer. We're only reaching this point if we have accumulated + // enough data to upload. RETURN_NOT_OK(CommitCurrentPart()); } @@ -1810,40 +1890,73 @@ class ObjectOutputStream final : public io::OutputStream { } // Wait for background writes to finish std::unique_lock lock(upload_state_->mutex); - return upload_state_->pending_parts_completed; + return upload_state_->pending_uploads_completed; } // Upload-related helpers Status CommitCurrentPart() { + if (!IsMultipartCreated()) { + RETURN_NOT_OK(CreateMultipartUpload()); + } + ARROW_ASSIGN_OR_RAISE(auto buf, current_part_->Finish()); current_part_.reset(); current_part_size_ = 0; return UploadPart(buf); } - Status UploadPart(std::shared_ptr buffer) { - return UploadPart(buffer->data(), buffer->size(), buffer); + Status UploadUsingSingleRequest() { + std::shared_ptr buf; + if (current_part_ == nullptr) { + // In case the stream is closed directly after it has been opened without writing + // anything, we'll have to create an empty buffer. + buf = std::make_shared(""); + } else { + ARROW_ASSIGN_OR_RAISE(buf, current_part_->Finish()); + } + + current_part_.reset(); + current_part_size_ = 0; + return UploadUsingSingleRequest(buf); } - Status UploadPart(const void* data, int64_t nbytes, - std::shared_ptr owned_buffer = nullptr) { - S3Model::UploadPartRequest req; + template + using UploadResultCallbackFunction = + std::function, + int32_t part_number, OutcomeType outcome)>; + + static Result TriggerUploadRequest( + const Aws::S3::Model::PutObjectRequest& request, + const std::shared_ptr& holder) { + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock()); + return client_lock.Move()->PutObject(request); + } + + static Result TriggerUploadRequest( + const Aws::S3::Model::UploadPartRequest& request, + const std::shared_ptr& holder) { + ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock()); + return client_lock.Move()->UploadPart(request); + } + + template + Status Upload( + RequestType&& req, + UploadResultCallbackFunction sync_result_callback, + UploadResultCallbackFunction async_result_callback, + const void* data, int64_t nbytes, std::shared_ptr owned_buffer = nullptr) { req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - req.SetUploadId(upload_id_); - req.SetPartNumber(part_number_); + req.SetBody(std::make_shared(data, nbytes)); req.SetContentLength(nbytes); if (!background_writes_) { req.SetBody(std::make_shared(data, nbytes)); - ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); - auto outcome = client_lock.Move()->UploadPart(req); - if (!outcome.IsSuccess()) { - return UploadPartError(req, outcome); - } else { - AddCompletedPart(upload_state_, part_number_, outcome.GetResult()); - } + + ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_)); + + RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_, outcome)); } else { // If the data isn't owned, make an immutable copy for the lifetime of the closure if (owned_buffer == nullptr) { @@ -1858,19 +1971,18 @@ class ObjectOutputStream final : public io::OutputStream { { std::unique_lock lock(upload_state_->mutex); - if (upload_state_->parts_in_progress++ == 0) { - upload_state_->pending_parts_completed = Future<>::Make(); + if (upload_state_->uploads_in_progress++ == 0) { + upload_state_->pending_uploads_completed = Future<>::Make(); } } // The closure keeps the buffer and the upload state alive auto deferred = [owned_buffer, holder = holder_, req = std::move(req), - state = upload_state_, + state = upload_state_, async_result_callback, part_number = part_number_]() mutable -> Status { - ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock()); - auto outcome = client_lock.Move()->UploadPart(req); - HandleUploadOutcome(state, part_number, req, outcome); - return Status::OK(); + ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder)); + + return async_result_callback(req, state, part_number, outcome); }; RETURN_NOT_OK(SubmitIO(io_context_, std::move(deferred))); } @@ -1880,9 +1992,118 @@ class ObjectOutputStream final : public io::OutputStream { return Status::OK(); } - static void HandleUploadOutcome(const std::shared_ptr& state, - int part_number, const S3Model::UploadPartRequest& req, - const Result& result) { + static Status UploadUsingSingleRequestError( + const Aws::S3::Model::PutObjectRequest& request, + const Aws::S3::Model::PutObjectOutcome& outcome) { + return ErrorToStatus( + std::forward_as_tuple("When uploading object with key '", request.GetKey(), + "' in bucket '", request.GetBucket(), "': "), + "PutObject", outcome.GetError()); + } + + Status UploadUsingSingleRequest(std::shared_ptr buffer) { + return UploadUsingSingleRequest(buffer->data(), buffer->size(), buffer); + } + + Status UploadUsingSingleRequest(const void* data, int64_t nbytes, + std::shared_ptr owned_buffer = nullptr) { + auto sync_result_callback = [](const Aws::S3::Model::PutObjectRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::PutObjectOutcome outcome) { + if (!outcome.IsSuccess()) { + return UploadUsingSingleRequestError(request, outcome); + } + return Status::OK(); + }; + + auto async_result_callback = [](const Aws::S3::Model::PutObjectRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::PutObjectOutcome outcome) { + HandleUploadUsingSingleRequestOutcome(state, request, outcome.GetResult()); + return Status::OK(); + }; + + Aws::S3::Model::PutObjectRequest req{}; + RETURN_NOT_OK(SetMetadataInRequest(&req)); + + return Upload( + std::move(req), std::move(sync_result_callback), std::move(async_result_callback), + data, nbytes, std::move(owned_buffer)); + } + + Status UploadPart(std::shared_ptr buffer) { + return UploadPart(buffer->data(), buffer->size(), buffer); + } + + static Status UploadPartError(const Aws::S3::Model::UploadPartRequest& request, + const Aws::S3::Model::UploadPartOutcome& outcome) { + return ErrorToStatus( + std::forward_as_tuple("When uploading part for key '", request.GetKey(), + "' in bucket '", request.GetBucket(), "': "), + "UploadPart", outcome.GetError()); + } + + Status UploadPart(const void* data, int64_t nbytes, + std::shared_ptr owned_buffer = nullptr) { + if (!IsMultipartCreated()) { + RETURN_NOT_OK(CreateMultipartUpload()); + } + + Aws::S3::Model::UploadPartRequest req{}; + req.SetPartNumber(part_number_); + req.SetUploadId(multipart_upload_id_); + + auto sync_result_callback = [](const Aws::S3::Model::UploadPartRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::UploadPartOutcome outcome) { + if (!outcome.IsSuccess()) { + return UploadPartError(request, outcome); + } else { + AddCompletedPart(state, part_number, outcome.GetResult()); + } + + return Status::OK(); + }; + + auto async_result_callback = [](const Aws::S3::Model::UploadPartRequest& request, + std::shared_ptr state, + int32_t part_number, + Aws::S3::Model::UploadPartOutcome outcome) { + HandleUploadPartOutcome(state, part_number, request, outcome.GetResult()); + return Status::OK(); + }; + + return Upload( + std::move(req), std::move(sync_result_callback), std::move(async_result_callback), + data, nbytes, std::move(owned_buffer)); + } + + static void HandleUploadUsingSingleRequestOutcome( + const std::shared_ptr& state, const S3Model::PutObjectRequest& req, + const Result& result) { + std::unique_lock lock(state->mutex); + if (!result.ok()) { + state->status &= result.status(); + } else { + const auto& outcome = *result; + if (!outcome.IsSuccess()) { + state->status &= UploadUsingSingleRequestError(req, outcome); + } + } + // GH-41862: avoid potential deadlock if the Future's callback is called + // with the mutex taken. + auto fut = state->pending_uploads_completed; + lock.unlock(); + fut.MarkFinished(state->status); + } + + static void HandleUploadPartOutcome(const std::shared_ptr& state, + int part_number, + const S3Model::UploadPartRequest& req, + const Result& result) { std::unique_lock lock(state->mutex); if (!result.ok()) { state->status &= result.status(); @@ -1895,10 +2116,10 @@ class ObjectOutputStream final : public io::OutputStream { } } // Notify completion - if (--state->parts_in_progress == 0) { + if (--state->uploads_in_progress == 0) { // GH-41862: avoid potential deadlock if the Future's callback is called // with the mutex taken. - auto fut = state->pending_parts_completed; + auto fut = state->pending_uploads_completed; lock.unlock(); // State could be mutated concurrently if another thread writes to the // stream, but in this case the Flush() call is only advisory anyway. @@ -1923,14 +2144,6 @@ class ObjectOutputStream final : public io::OutputStream { state->completed_parts[slot] = std::move(part); } - static Status UploadPartError(const S3Model::UploadPartRequest& req, - const S3Model::UploadPartOutcome& outcome) { - return ErrorToStatus( - std::forward_as_tuple("When uploading part for key '", req.GetKey(), - "' in bucket '", req.GetBucket(), "': "), - "UploadPart", outcome.GetError()); - } - protected: std::shared_ptr holder_; const io::IOContext io_context_; @@ -1938,8 +2151,9 @@ class ObjectOutputStream final : public io::OutputStream { const std::shared_ptr metadata_; const std::shared_ptr default_metadata_; const bool background_writes_; + const bool allow_delayed_open_; - Aws::String upload_id_; + Aws::String multipart_upload_id_; bool closed_ = true; int64_t pos_ = 0; int32_t part_number_ = 1; @@ -1950,10 +2164,11 @@ class ObjectOutputStream final : public io::OutputStream { // in the completion handler. struct UploadState { std::mutex mutex; + // Only populated for multi-part uploads. Aws::Vector completed_parts; - int64_t parts_in_progress = 0; + int64_t uploads_in_progress = 0; Status status; - Future<> pending_parts_completed = Future<>::MakeFinished(Status::OK()); + Future<> pending_uploads_completed = Future<>::MakeFinished(Status::OK()); }; std::shared_ptr upload_state_; }; diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index fbbe9d0b3f42b..85d5ff8fed553 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -177,6 +177,16 @@ struct ARROW_EXPORT S3Options { /// to be true to address these scenarios. bool check_directory_existence_before_creation = false; + /// Whether to allow file-open methods to return before the actual open. + /// + /// Enabling this may reduce the latency of `OpenInputStream`, `OpenOutputStream`, + /// and similar methods, by reducing the number of roundtrips necessary. It may also + /// allow usage of more efficient S3 APIs for small files. + /// The downside is that failure conditions such as attempting to open a file in a + /// non-existing bucket will only be reported when actual I/O is done (at worse, + /// when attempting to close the file). + bool allow_delayed_open = false; + /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 5a160a78ceea0..c33fa4f5aac97 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -45,7 +45,9 @@ #include #include #include +#include #include +#include #include #include @@ -450,25 +452,8 @@ class TestS3FS : public S3TestMixin { req.SetBucket(ToAwsString("empty-bucket")); ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req))); } - { - Aws::S3::Model::PutObjectRequest req; - req.SetBucket(ToAwsString("bucket")); - req.SetKey(ToAwsString("emptydir/")); - req.SetBody(std::make_shared("")); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - // NOTE: no need to create intermediate "directories" somedir/ and - // somedir/subdir/ - req.SetKey(ToAwsString("somedir/subdir/subfile")); - req.SetBody(std::make_shared("sub data")); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - req.SetKey(ToAwsString("somefile")); - req.SetBody(std::make_shared("some data")); - req.SetContentType("x-arrow/test"); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - req.SetKey(ToAwsString("otherdir/1/2/3/otherfile")); - req.SetBody(std::make_shared("other data")); - ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); - } + + ASSERT_OK(PopulateTestBucket()); } void TearDown() override { @@ -478,6 +463,72 @@ class TestS3FS : public S3TestMixin { S3TestMixin::TearDown(); } + Status PopulateTestBucket() { + Aws::S3::Model::PutObjectRequest req; + req.SetBucket(ToAwsString("bucket")); + req.SetKey(ToAwsString("emptydir/")); + req.SetBody(std::make_shared("")); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + // NOTE: no need to create intermediate "directories" somedir/ and + // somedir/subdir/ + req.SetKey(ToAwsString("somedir/subdir/subfile")); + req.SetBody(std::make_shared("sub data")); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + req.SetKey(ToAwsString("somefile")); + req.SetBody(std::make_shared("some data")); + req.SetContentType("x-arrow/test"); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + req.SetKey(ToAwsString("otherdir/1/2/3/otherfile")); + req.SetBody(std::make_shared("other data")); + RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); + + return Status::OK(); + } + + Status RestoreTestBucket() { + // First empty the test bucket, and then re-upload initial test files. + + Aws::S3::Model::Delete delete_object; + { + // Mostly taken from + // https://github.com/awsdocs/aws-doc-sdk-examples/blob/main/cpp/example_code/s3/list_objects.cpp + Aws::S3::Model::ListObjectsV2Request req; + req.SetBucket(Aws::String{"bucket"}); + + Aws::String continuation_token; + do { + if (!continuation_token.empty()) { + req.SetContinuationToken(continuation_token); + } + + auto outcome = client_->ListObjectsV2(req); + + if (!outcome.IsSuccess()) { + return OutcomeToStatus("ListObjectsV2", outcome); + } else { + Aws::Vector objects = outcome.GetResult().GetContents(); + for (const auto& object : objects) { + delete_object.AddObjects( + Aws::S3::Model::ObjectIdentifier().WithKey(object.GetKey())); + } + + continuation_token = outcome.GetResult().GetNextContinuationToken(); + } + } while (!continuation_token.empty()); + } + + { + Aws::S3::Model::DeleteObjectsRequest req; + + req.SetDelete(std::move(delete_object)); + req.SetBucket(Aws::String{"bucket"}); + + RETURN_NOT_OK(OutcomeToStatus("DeleteObjects", client_->DeleteObjects(req))); + } + + return PopulateTestBucket(); + } + Result> MakeNewFileSystem( io::IOContext io_context = io::default_io_context()) { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); @@ -518,11 +569,13 @@ class TestS3FS : public S3TestMixin { AssertFileInfo(infos[11], "empty-bucket", FileType::Directory); } - void TestOpenOutputStream() { + void TestOpenOutputStream(bool allow_delayed_open) { std::shared_ptr stream; - // Nonexistent - ASSERT_RAISES(IOError, fs_->OpenOutputStream("nonexistent-bucket/somefile")); + if (!allow_delayed_open) { + // Nonexistent + ASSERT_RAISES(IOError, fs_->OpenOutputStream("nonexistent-bucket/somefile")); + } // URI ASSERT_RAISES(Invalid, fs_->OpenOutputStream("s3:bucket/newfile1")); @@ -843,8 +896,8 @@ TEST_F(TestS3FS, GetFileInfoGenerator) { TEST_F(TestS3FS, GetFileInfoGeneratorStress) { // This test is slow because it needs to create a bunch of seed files. However, it is - // the only test that stresses listing and deleting when there are more than 1000 files - // and paging is required. + // the only test that stresses listing and deleting when there are more than 1000 + // files and paging is required. constexpr int32_t kNumDirs = 4; constexpr int32_t kNumFilesPerDir = 512; FileInfoVector expected_infos; @@ -1235,50 +1288,83 @@ TEST_F(TestS3FS, OpenInputFile) { ASSERT_RAISES(IOError, file->Seek(10)); } -TEST_F(TestS3FS, OpenOutputStreamBackgroundWrites) { TestOpenOutputStream(); } +struct S3OptionsTestParameters { + bool background_writes{false}; + bool allow_delayed_open{false}; -TEST_F(TestS3FS, OpenOutputStreamSyncWrites) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStream(); -} + void ApplyToS3Options(S3Options* options) const { + options->background_writes = background_writes; + options->allow_delayed_open = allow_delayed_open; + } -TEST_F(TestS3FS, OpenOutputStreamAbortBackgroundWrites) { TestOpenOutputStreamAbort(); } + static std::vector GetCartesianProduct() { + return { + S3OptionsTestParameters{true, false}, + S3OptionsTestParameters{false, false}, + S3OptionsTestParameters{true, true}, + S3OptionsTestParameters{false, true}, + }; + } -TEST_F(TestS3FS, OpenOutputStreamAbortSyncWrites) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStreamAbort(); -} + std::string ToString() const { + return std::string("background_writes = ") + (background_writes ? "true" : "false") + + ", allow_delayed_open = " + (allow_delayed_open ? "true" : "false"); + } +}; + +TEST_F(TestS3FS, OpenOutputStream) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); -TEST_F(TestS3FS, OpenOutputStreamDestructorBackgroundWrites) { - TestOpenOutputStreamDestructor(); + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStream(combination.allow_delayed_open); + ASSERT_OK(RestoreTestBucket()); + } } -TEST_F(TestS3FS, OpenOutputStreamDestructorSyncWrite) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStreamDestructor(); +TEST_F(TestS3FS, OpenOutputStreamAbort) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); + + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStreamAbort(); + ASSERT_OK(RestoreTestBucket()); + } } -TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorBackgroundWrites) { - TestOpenOutputStreamCloseAsyncDestructor(); +TEST_F(TestS3FS, OpenOutputStreamDestructor) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); + + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStreamDestructor(); + ASSERT_OK(RestoreTestBucket()); + } } -TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorSyncWrite) { - options_.background_writes = false; - MakeFileSystem(); - TestOpenOutputStreamCloseAsyncDestructor(); +TEST_F(TestS3FS, OpenOutputStreamAsync) { + for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) { + ARROW_SCOPED_TRACE(combination.ToString()); + + combination.ApplyToS3Options(&options_); + MakeFileSystem(); + TestOpenOutputStreamCloseAsyncDestructor(); + } } TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockBackgroundWrites) { TestOpenOutputStreamCloseAsyncFutureDeadlock(); + ASSERT_OK(RestoreTestBucket()); } TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockSyncWrite) { options_.background_writes = false; MakeFileSystem(); TestOpenOutputStreamCloseAsyncFutureDeadlock(); + ASSERT_OK(RestoreTestBucket()); } TEST_F(TestS3FS, OpenOutputStreamMetadata) { @@ -1396,8 +1482,8 @@ TEST_F(TestS3FS, CustomRetryStrategy) { auto retry_strategy = std::make_shared(); options_.retry_strategy = retry_strategy; MakeFileSystem(); - // Attempt to open file that doesn't exist. Should hit TestRetryStrategy::ShouldRetry() - // 3 times before bubbling back up here. + // Attempt to open file that doesn't exist. Should hit + // TestRetryStrategy::ShouldRetry() 3 times before bubbling back up here. ASSERT_RAISES(IOError, fs_->OpenInputStream("nonexistent-bucket/somefile")); ASSERT_EQ(retry_strategy->GetErrorsEncountered().size(), 3); for (const auto& error : retry_strategy->GetErrorsEncountered()) { From 3483ac6bc289e44cd07dc5440270dc9a2fe6af0c Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 22 Jul 2024 10:40:03 -0300 Subject: [PATCH 105/122] GH-43329: [C++] Order classes in flight/types.h according to Flight.proto (#43330) ### Rationale for this change Identify the differences between Flight.proto and the hand-written types.h,.cc files. ### What changes are included in this PR? - Re-ordering of the classes ### Are these changes tested? By existing tests. * GitHub Issue: #43329 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/flight/types.cc | 362 ++++++++-------- cpp/src/arrow/flight/types.h | 757 +++++++++++++++++----------------- format/Flight.proto | 124 +++--- 3 files changed, 636 insertions(+), 607 deletions(-) diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index dc1173ebe18db..bb5932a312567 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -151,18 +151,57 @@ Status MakeFlightError(FlightStatusCode code, std::string message, std::make_shared(code, std::move(extra_info))); } -bool FlightDescriptor::Equals(const FlightDescriptor& other) const { - if (type != other.type) { - return false; +static std::ostream& operator<<(std::ostream& os, std::vector values) { + os << '['; + std::string sep = ""; + for (const auto& v : values) { + os << sep << std::quoted(v); + sep = ", "; } - switch (type) { - case PATH: - return path == other.path; - case CMD: - return cmd == other.cmd; - default: - return false; + os << ']'; + + return os; +} + +template +static std::ostream& operator<<(std::ostream& os, std::map m) { + os << '{'; + std::string sep = ""; + if constexpr (std::is_convertible_v) { + // std::string, char*, std::string_view + for (const auto& [k, v] : m) { + os << sep << '[' << k << "]: " << std::quoted(v) << '"'; + sep = ", "; + } + } else { + for (const auto& [k, v] : m) { + os << sep << '[' << k << "]: " << v; + sep = ", "; + } } + os << '}'; + + return os; +} + +//------------------------------------------------------------ +// Wrapper types for Flight RPC protobuf messages + +std::string BasicAuth::ToString() const { + return arrow::util::StringBuilder(""); +} + +bool BasicAuth::Equals(const BasicAuth& other) const { + return (username == other.username) && (password == other.password); +} + +arrow::Status BasicAuth::Deserialize(std::string_view serialized, BasicAuth* out) { + return DeserializeProtoString("BasicAuth", serialized, out); +} + +arrow::Status BasicAuth::SerializeToString(std::string* out) const { + return SerializeToProtoString("BasicAuth", *this, out); } std::string FlightDescriptor::ToString() const { @@ -192,50 +231,18 @@ std::string FlightDescriptor::ToString() const { return ss.str(); } -Status FlightPayload::Validate() const { - static constexpr int64_t kInt32Max = std::numeric_limits::max(); - if (descriptor && descriptor->size() > kInt32Max) { - return Status::CapacityError("Descriptor size overflow (>= 2**31)"); - } - if (app_metadata && app_metadata->size() > kInt32Max) { - return Status::CapacityError("app_metadata size overflow (>= 2**31)"); +bool FlightDescriptor::Equals(const FlightDescriptor& other) const { + if (type != other.type) { + return false; } - if (ipc_message.body_length > kInt32Max) { - return Status::Invalid("Cannot send record batches exceeding 2GiB yet"); + switch (type) { + case PATH: + return path == other.path; + case CMD: + return cmd == other.cmd; + default: + return false; } - return Status::OK(); -} - -arrow::Result> SchemaResult::GetSchema( - ipc::DictionaryMemo* dictionary_memo) const { - // Create a non-owned Buffer to avoid copying - io::BufferReader schema_reader(std::make_shared(raw_schema_)); - return ipc::ReadSchema(&schema_reader, dictionary_memo); -} - -arrow::Result> SchemaResult::Make(const Schema& schema) { - std::string schema_in; - RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in)); - return std::make_unique(std::move(schema_in)); -} - -std::string SchemaResult::ToString() const { - return ""; -} - -bool SchemaResult::Equals(const SchemaResult& other) const { - return raw_schema_ == other.raw_schema_; -} - -arrow::Status SchemaResult::SerializeToString(std::string* out) const { - return SerializeToProtoString("SchemaResult", *this, out); -} - -arrow::Status SchemaResult::Deserialize(std::string_view serialized, SchemaResult* out) { - pb::SchemaResult pb_schema_result; - RETURN_NOT_OK(ParseFromString("SchemaResult", serialized, &pb_schema_result)); - *out = SchemaResult{pb_schema_result.schema()}; - return Status::OK(); } arrow::Status FlightDescriptor::SerializeToString(std::string* out) const { @@ -248,22 +255,6 @@ arrow::Status FlightDescriptor::Deserialize(std::string_view serialized, "FlightDescriptor", serialized, out); } -std::string Ticket::ToString() const { - std::stringstream ss; - ss << ""; - return ss.str(); -} - -bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; } - -arrow::Status Ticket::SerializeToString(std::string* out) const { - return SerializeToProtoString("Ticket", *this, out); -} - -arrow::Status Ticket::Deserialize(std::string_view serialized, Ticket* out) { - return DeserializeProtoString("Ticket", serialized, out); -} - arrow::Result FlightInfo::Make(const Schema& schema, const FlightDescriptor& descriptor, const std::vector& endpoints, @@ -431,43 +422,49 @@ arrow::Status CancelFlightInfoRequest::Deserialize(std::string_view serialized, "CancelFlightInfoRequest", serialized, out); } -static const char* const SetSessionOptionStatusNames[] = {"Unspecified", "InvalidName", - "InvalidValue", "Error"}; -static const char* const CloseSessionStatusNames[] = {"Unspecified", "Closed", "Closing", - "NotClosable"}; - -// Helpers for stringifying maps containing various types -std::string ToString(const SetSessionOptionErrorValue& error_value) { - return SetSessionOptionStatusNames[static_cast(error_value)]; +std::string CancelFlightInfoResult::ToString() const { + std::stringstream ss; + ss << ""; + return ss.str(); } -std::ostream& operator<<(std::ostream& os, - const SetSessionOptionErrorValue& error_value) { - os << ToString(error_value); - return os; +bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const { + return status == other.status; } -std::string ToString(const CloseSessionStatus& status) { - return CloseSessionStatusNames[static_cast(status)]; +arrow::Status CancelFlightInfoResult::SerializeToString(std::string* out) const { + return SerializeToProtoString("CancelFlightInfoResult", + *this, out); } -std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) { - os << ToString(status); - return os; +arrow::Status CancelFlightInfoResult::Deserialize(std::string_view serialized, + CancelFlightInfoResult* out) { + return DeserializeProtoString( + "CancelFlightInfoResult", serialized, out); } -std::ostream& operator<<(std::ostream& os, std::vector values) { - os << '['; - std::string sep = ""; - for (const auto& v : values) { - os << sep << std::quoted(v); - sep = ", "; +std::ostream& operator<<(std::ostream& os, CancelStatus status) { + switch (status) { + case CancelStatus::kUnspecified: + os << "Unspecified"; + break; + case CancelStatus::kCancelled: + os << "Cancelled"; + break; + case CancelStatus::kCancelling: + os << "Cancelling"; + break; + case CancelStatus::kNotCancellable: + os << "NotCancellable"; + break; } - os << ']'; - return os; } +// Session management messages + +// SessionOptionValue + std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) { if (std::holds_alternative(v)) { os << ""; @@ -486,33 +483,6 @@ std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) { return os; } -std::ostream& operator<<(std::ostream& os, const SetSessionOptionsResult::Error& e) { - os << '{' << e.value << '}'; - return os; -} - -template -std::ostream& operator<<(std::ostream& os, std::map m) { - os << '{'; - std::string sep = ""; - if constexpr (std::is_convertible_v) { - // std::string, char*, std::string_view - for (const auto& [k, v] : m) { - os << sep << '[' << k << "]: " << std::quoted(v) << '"'; - sep = ", "; - } - } else { - for (const auto& [k, v] : m) { - os << sep << '[' << k << "]: " << v; - sep = ", "; - } - } - os << '}'; - - return os; -} - -namespace { static bool CompareSessionOptionMaps(const std::map& a, const std::map& b) { if (a.size() != b.size()) { @@ -533,15 +503,30 @@ static bool CompareSessionOptionMaps(const std::map(error_value)]; +} + +std::ostream& operator<<(std::ostream& os, + const SetSessionOptionErrorValue& error_value) { + os << ToString(error_value); + return os; +} // SetSessionOptionsRequest std::string SetSessionOptionsRequest::ToString() const { std::stringstream ss; - ss << "(status)]; +} + +std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) { + os << ToString(status); + return os; +} + // CloseSessionResult std::string CloseSessionResult::ToString() const { std::stringstream ss; - ss << ""; + return ss.str(); +} + +bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; } + +arrow::Status Ticket::SerializeToString(std::string* out) const { + return SerializeToProtoString("Ticket", *this, out); +} + +arrow::Status Ticket::Deserialize(std::string_view serialized, Ticket* out) { + return DeserializeProtoString("Ticket", serialized, out); +} + Location::Location() { uri_ = std::make_shared(); } arrow::Result Location::Parse(const std::string& uri_string) { @@ -712,7 +733,6 @@ arrow::Result Location::ForScheme(const std::string& scheme, return Location::Parse(uri_string.str()); } -std::string Location::ToString() const { return uri_->ToString(); } std::string Location::scheme() const { std::string scheme = uri_->scheme(); if (scheme.empty()) { @@ -722,6 +742,8 @@ std::string Location::scheme() const { return scheme; } +std::string Location::ToString() const { return uri_->ToString(); } + bool Location::Equals(const Location& other) const { return ToString() == other.ToString(); } @@ -815,6 +837,20 @@ arrow::Status RenewFlightEndpointRequest::Deserialize(std::string_view serialize serialized, out); } +Status FlightPayload::Validate() const { + static constexpr int64_t kInt32Max = std::numeric_limits::max(); + if (descriptor && descriptor->size() > kInt32Max) { + return Status::CapacityError("Descriptor size overflow (>= 2**31)"); + } + if (app_metadata && app_metadata->size() > kInt32Max) { + return Status::CapacityError("app_metadata size overflow (>= 2**31)"); + } + if (ipc_message.body_length > kInt32Max) { + return Status::Invalid("Cannot send record batches exceeding 2GiB yet"); + } + return Status::OK(); +} + std::string ActionType::ToString() const { return arrow::util::StringBuilder(""); @@ -924,45 +960,40 @@ arrow::Status Result::Deserialize(std::string_view serialized, Result* out) { return DeserializeProtoString("Result", serialized, out); } -std::string CancelFlightInfoResult::ToString() const { - std::stringstream ss; - ss << ""; - return ss.str(); +arrow::Result> SchemaResult::GetSchema( + ipc::DictionaryMemo* dictionary_memo) const { + // Create a non-owned Buffer to avoid copying + io::BufferReader schema_reader(std::make_shared(raw_schema_)); + return ipc::ReadSchema(&schema_reader, dictionary_memo); } -bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const { - return status == other.status; +arrow::Result> SchemaResult::Make(const Schema& schema) { + std::string schema_in; + RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in)); + return std::make_unique(std::move(schema_in)); } -arrow::Status CancelFlightInfoResult::SerializeToString(std::string* out) const { - return SerializeToProtoString("CancelFlightInfoResult", - *this, out); +std::string SchemaResult::ToString() const { + return ""; } -arrow::Status CancelFlightInfoResult::Deserialize(std::string_view serialized, - CancelFlightInfoResult* out) { - return DeserializeProtoString( - "CancelFlightInfoResult", serialized, out); +bool SchemaResult::Equals(const SchemaResult& other) const { + return raw_schema_ == other.raw_schema_; } -std::ostream& operator<<(std::ostream& os, CancelStatus status) { - switch (status) { - case CancelStatus::kUnspecified: - os << "Unspecified"; - break; - case CancelStatus::kCancelled: - os << "Cancelled"; - break; - case CancelStatus::kCancelling: - os << "Cancelling"; - break; - case CancelStatus::kNotCancellable: - os << "NotCancellable"; - break; - } - return os; +arrow::Status SchemaResult::SerializeToString(std::string* out) const { + return SerializeToProtoString("SchemaResult", *this, out); } +arrow::Status SchemaResult::Deserialize(std::string_view serialized, SchemaResult* out) { + pb::SchemaResult pb_schema_result; + RETURN_NOT_OK(ParseFromString("SchemaResult", serialized, &pb_schema_result)); + *out = SchemaResult{pb_schema_result.schema()}; + return Status::OK(); +} + +//------------------------------------------------------------ + Status ResultStream::Drain() { while (true) { ARROW_ASSIGN_OR_RAISE(auto result, Next()); @@ -1050,23 +1081,6 @@ arrow::Result> SimpleResultStream::Next() { return std::make_unique(std::move(results_[position_++])); } -std::string BasicAuth::ToString() const { - return arrow::util::StringBuilder(""); -} - -bool BasicAuth::Equals(const BasicAuth& other) const { - return (username == other.username) && (password == other.password); -} - -arrow::Status BasicAuth::Deserialize(std::string_view serialized, BasicAuth* out) { - return DeserializeProtoString("BasicAuth", serialized, out); -} - -arrow::Status BasicAuth::SerializeToString(std::string* out) const { - return SerializeToProtoString("BasicAuth", *this, out); -} - //------------------------------------------------------------ // Error propagation helpers diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index bc8e234d977b1..de93750f75b25 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -61,6 +61,18 @@ class Uri; namespace flight { +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpc; +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpcTcp; +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpcUnix; +ARROW_FLIGHT_EXPORT +extern const char* kSchemeGrpcTls; + +class FlightClient; +class FlightServerBase; + /// \brief A timestamp compatible with Protocol Buffer's /// google.protobuf.Timestamp: /// @@ -214,6 +226,40 @@ struct BaseType { } // namespace internal +//------------------------------------------------------------ +// Wrapper types for Flight RPC protobuf messages + +// A wrapper around arrow.flight.protocol.HandshakeRequest is not defined +// A wrapper around arrow.flight.protocol.HandshakeResponse is not defined + +/// \brief message for simple auth +struct ARROW_FLIGHT_EXPORT BasicAuth : public internal::BaseType { + std::string username; + std::string password; + + BasicAuth() = default; + BasicAuth(std::string username, std::string password) + : username(std::move(username)), password(std::move(password)) {} + + std::string ToString() const; + bool Equals(const BasicAuth& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, BasicAuth* out); +}; + +// A wrapper around arrow.flight.protocol.Empty is not defined + /// \brief A type of action that can be performed with the DoAction RPC. struct ARROW_FLIGHT_EXPORT ActionType : public internal::BaseType { /// \brief The name of the action. @@ -330,63 +376,26 @@ struct ARROW_FLIGHT_EXPORT Result : public internal::BaseType { static arrow::Status Deserialize(std::string_view serialized, Result* out); }; -enum class CancelStatus { - /// The cancellation status is unknown. Servers should avoid using - /// this value (send a kNotCancellable if the requested FlightInfo - /// is not known). Clients can retry the request. - kUnspecified = 0, - /// The cancellation request is complete. Subsequent requests with - /// the same payload may return kCancelled or a kNotCancellable error. - kCancelled = 1, - /// The cancellation request is in progress. The client may retry - /// the cancellation request. - kCancelling = 2, - // The FlightInfo is not cancellable. The client should not retry the - // cancellation request. - kNotCancellable = 3, -}; - -/// \brief The result of the CancelFlightInfo action. -struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult - : public internal::BaseType { - CancelStatus status = CancelStatus::kUnspecified; - - CancelFlightInfoResult() = default; - CancelFlightInfoResult(CancelStatus status) // NOLINT runtime/explicit - : status(status) {} - - std::string ToString() const; - bool Equals(const CancelFlightInfoResult& other) const; - - using SuperT::Deserialize; - using SuperT::SerializeToString; - - /// \brief Serialize this message to its wire-format representation. - /// - /// Use `SerializeToString()` if you want a Result-returning version. - arrow::Status SerializeToString(std::string* out) const; - - /// \brief Deserialize this message from its wire-format representation. - /// - /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, - CancelFlightInfoResult* out); -}; +/// \brief Schema result returned after a schema request RPC +struct ARROW_FLIGHT_EXPORT SchemaResult : public internal::BaseType { + public: + SchemaResult() = default; + explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {} -ARROW_FLIGHT_EXPORT -std::ostream& operator<<(std::ostream& os, CancelStatus status); + /// \brief Factory method to construct a SchemaResult. + static arrow::Result> Make(const Schema& schema); -/// \brief message for simple auth -struct ARROW_FLIGHT_EXPORT BasicAuth : public internal::BaseType { - std::string username; - std::string password; + /// \brief return schema + /// \param[in,out] dictionary_memo for dictionary bookkeeping, will + /// be modified + /// \return Arrow result with the reconstructed Schema + arrow::Result> GetSchema( + ipc::DictionaryMemo* dictionary_memo) const; - BasicAuth() = default; - BasicAuth(std::string username, std::string password) - : username(std::move(username)), password(std::move(password)) {} + const std::string& serialized_schema() const { return raw_schema_; } std::string ToString() const; - bool Equals(const BasicAuth& other) const; + bool Equals(const SchemaResult& other) const; using SuperT::Deserialize; using SuperT::SerializeToString; @@ -399,7 +408,10 @@ struct ARROW_FLIGHT_EXPORT BasicAuth : public internal::BaseType { /// \brief Deserialize this message from its wire-format representation. /// /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, BasicAuth* out); + static arrow::Status Deserialize(std::string_view serialized, SchemaResult* out); + + private: + std::string raw_schema_; }; /// \brief A request to retrieve or generate a dataset @@ -427,10 +439,9 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor FlightDescriptor(DescriptorType type, std::string cmd, std::vector path) : type(type), cmd(std::move(cmd)), path(std::move(path)) {} - bool Equals(const FlightDescriptor& other) const; - /// \brief Get a human-readable form of this descriptor. std::string ToString() const; + bool Equals(const FlightDescriptor& other) const; using SuperT::Deserialize; using SuperT::SerializeToString; @@ -462,17 +473,60 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor } }; -/// \brief Data structure providing an opaque identifier or credential to use -/// when requesting a data stream with the DoGet RPC -struct ARROW_FLIGHT_EXPORT Ticket : public internal::BaseType { - std::string ticket; +/// \brief The access coordinates for retrieval of a dataset, returned by +/// GetFlightInfo +class ARROW_FLIGHT_EXPORT FlightInfo + : public internal::BaseType> { + public: + struct Data { + std::string schema; + FlightDescriptor descriptor; + std::vector endpoints; + int64_t total_records = -1; + int64_t total_bytes = -1; + bool ordered = false; + std::string app_metadata; + }; - Ticket() = default; - Ticket(std::string ticket) // NOLINT runtime/explicit - : ticket(std::move(ticket)) {} + explicit FlightInfo(Data data) : data_(std::move(data)), reconstructed_schema_(false) {} - std::string ToString() const; - bool Equals(const Ticket& other) const; + /// \brief Factory method to construct a FlightInfo. + static arrow::Result Make(const Schema& schema, + const FlightDescriptor& descriptor, + const std::vector& endpoints, + int64_t total_records, int64_t total_bytes, + bool ordered = false, + std::string app_metadata = ""); + + /// \brief Deserialize the Arrow schema of the dataset. Populate any + /// dictionary encoded fields into a DictionaryMemo for + /// bookkeeping + /// \param[in,out] dictionary_memo for dictionary bookkeeping, will + /// be modified + /// \return Arrow result with the reconstructed Schema + arrow::Result> GetSchema( + ipc::DictionaryMemo* dictionary_memo) const; + + const std::string& serialized_schema() const { return data_.schema; } + + /// The descriptor associated with this flight, may not be set + const FlightDescriptor& descriptor() const { return data_.descriptor; } + + /// A list of endpoints associated with the flight (dataset). To consume the + /// whole flight, all endpoints must be consumed + const std::vector& endpoints() const { return data_.endpoints; } + + /// The total number of records (rows) in the dataset. If unknown, set to -1 + int64_t total_records() const { return data_.total_records; } + + /// The total number of bytes in the dataset. If unknown, set to -1 + int64_t total_bytes() const { return data_.total_bytes; } + + /// Whether endpoints are in the same order as the data. + bool ordered() const { return data_.ordered; } + + /// Application-defined opaque metadata + const std::string& app_metadata() const { return data_.app_metadata; } using SuperT::Deserialize; using SuperT::SerializeToString; @@ -491,53 +545,233 @@ struct ARROW_FLIGHT_EXPORT Ticket : public internal::BaseType { /// services) that may want to return Flight types. /// /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, Ticket* out); -}; - -class FlightClient; -class FlightServerBase; + static arrow::Status Deserialize(std::string_view serialized, + std::unique_ptr* out); -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpc; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpcTcp; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpcUnix; -ARROW_FLIGHT_EXPORT -extern const char* kSchemeGrpcTls; + std::string ToString() const; -/// \brief A host location (a URI) -struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType { - public: - /// \brief Initialize a blank location. - Location(); + /// Compare two FlightInfo for equality. This will compare the + /// serialized schema representations, NOT the logical equality of + /// the schemas. + bool Equals(const FlightInfo& other) const; - /// \brief Initialize a location by parsing a URI string - static arrow::Result Parse(const std::string& uri_string); + private: + Data data_; + mutable std::shared_ptr schema_; + mutable bool reconstructed_schema_; +}; - /// \brief Get the fallback URI. - /// - /// arrow-flight-reuse-connection://? means that a client may attempt to - /// reuse an existing connection to a Flight service to fetch data instead - /// of creating a new connection to one of the other locations listed in a - /// FlightEndpoint response. - static const Location& ReuseConnection(); +/// \brief The information to process a long-running query. +class ARROW_FLIGHT_EXPORT PollInfo + : public internal::BaseType> { + public: + /// The currently available results so far. + std::unique_ptr info = NULLPTR; + /// The descriptor the client should use on the next try. If unset, + /// the query is complete. + std::optional descriptor = std::nullopt; + /// Query progress. Must be in [0.0, 1.0] but need not be + /// monotonic or nondecreasing. If unknown, do not set. + std::optional progress = std::nullopt; + /// Expiration time for this request. After this passes, the server + /// might not accept the poll descriptor anymore (and the query may + /// be cancelled). This may be updated on a call to PollFlightInfo. + std::optional expiration_time = std::nullopt; - /// \brief Initialize a location for a non-TLS, gRPC-based Flight - /// service from a host and port - /// \param[in] host The hostname to connect to - /// \param[in] port The port - /// \return Arrow result with the resulting location - static arrow::Result ForGrpcTcp(const std::string& host, const int port); + PollInfo() + : info(NULLPTR), + descriptor(std::nullopt), + progress(std::nullopt), + expiration_time(std::nullopt) {} - /// \brief Initialize a location for a TLS-enabled, gRPC-based Flight - /// service from a host and port - /// \param[in] host The hostname to connect to - /// \param[in] port The port - /// \return Arrow result with the resulting location - static arrow::Result ForGrpcTls(const std::string& host, const int port); + PollInfo(std::unique_ptr info, std::optional descriptor, + std::optional progress, std::optional expiration_time) + : info(std::move(info)), + descriptor(std::move(descriptor)), + progress(progress), + expiration_time(expiration_time) {} - /// \brief Initialize a location for a domain socket-based Flight + PollInfo(const PollInfo& other) + : info(other.info ? std::make_unique(*other.info) : NULLPTR), + descriptor(other.descriptor), + progress(other.progress), + expiration_time(other.expiration_time) {} + PollInfo(PollInfo&& other) noexcept = default; + ~PollInfo() = default; + PollInfo& operator=(const PollInfo& other) { + info = other.info ? std::make_unique(*other.info) : NULLPTR; + descriptor = other.descriptor; + progress = other.progress; + expiration_time = other.expiration_time; + return *this; + } + PollInfo& operator=(PollInfo&& other) = default; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Get the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Parse the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + std::unique_ptr* out); + + std::string ToString() const; + + /// Compare two PollInfo for equality. This will compare the + /// serialized schema representations, NOT the logical equality of + /// the schemas. + bool Equals(const PollInfo& other) const; +}; + +/// \brief The request of the CancelFlightInfoRequest action. +struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest + : public internal::BaseType { + std::unique_ptr info; + + CancelFlightInfoRequest() = default; + CancelFlightInfoRequest(std::unique_ptr info) // NOLINT runtime/explicit + : info(std::move(info)) {} + + std::string ToString() const; + bool Equals(const CancelFlightInfoRequest& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + CancelFlightInfoRequest* out); +}; + +enum class CancelStatus { + /// The cancellation status is unknown. Servers should avoid using + /// this value (send a kNotCancellable if the requested FlightInfo + /// is not known). Clients can retry the request. + kUnspecified = 0, + /// The cancellation request is complete. Subsequent requests with + /// the same payload may return kCancelled or a kNotCancellable error. + kCancelled = 1, + /// The cancellation request is in progress. The client may retry + /// the cancellation request. + kCancelling = 2, + // The FlightInfo is not cancellable. The client should not retry the + // cancellation request. + kNotCancellable = 3, +}; + +/// \brief The result of the CancelFlightInfo action. +struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult + : public internal::BaseType { + CancelStatus status = CancelStatus::kUnspecified; + + CancelFlightInfoResult() = default; + CancelFlightInfoResult(CancelStatus status) // NOLINT runtime/explicit + : status(status) {} + + std::string ToString() const; + bool Equals(const CancelFlightInfoResult& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Serialize this message to its wire-format representation. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Deserialize this message from its wire-format representation. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, + CancelFlightInfoResult* out); +}; + +ARROW_FLIGHT_EXPORT +std::ostream& operator<<(std::ostream& os, CancelStatus status); + +/// \brief Data structure providing an opaque identifier or credential to use +/// when requesting a data stream with the DoGet RPC +struct ARROW_FLIGHT_EXPORT Ticket : public internal::BaseType { + std::string ticket; + + Ticket() = default; + Ticket(std::string ticket) // NOLINT runtime/explicit + : ticket(std::move(ticket)) {} + + std::string ToString() const; + bool Equals(const Ticket& other) const; + + using SuperT::Deserialize; + using SuperT::SerializeToString; + + /// \brief Get the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `SerializeToString()` if you want a Result-returning version. + arrow::Status SerializeToString(std::string* out) const; + + /// \brief Parse the wire-format representation of this type. + /// + /// Useful when interoperating with non-Flight systems (e.g. REST + /// services) that may want to return Flight types. + /// + /// Use `Deserialize(serialized)` if you want a Result-returning version. + static arrow::Status Deserialize(std::string_view serialized, Ticket* out); +}; + +/// \brief A host location (a URI) +struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType { + public: + /// \brief Initialize a blank location. + Location(); + + /// \brief Initialize a location by parsing a URI string + static arrow::Result Parse(const std::string& uri_string); + + /// \brief Get the fallback URI. + /// + /// arrow-flight-reuse-connection://? means that a client may attempt to + /// reuse an existing connection to a Flight service to fetch data instead + /// of creating a new connection to one of the other locations listed in a + /// FlightEndpoint response. + static const Location& ReuseConnection(); + + /// \brief Initialize a location for a non-TLS, gRPC-based Flight + /// service from a host and port + /// \param[in] host The hostname to connect to + /// \param[in] port The port + /// \return Arrow result with the resulting location + static arrow::Result ForGrpcTcp(const std::string& host, const int port); + + /// \brief Initialize a location for a TLS-enabled, gRPC-based Flight + /// service from a host and port + /// \param[in] host The hostname to connect to + /// \param[in] port The port + /// \return Arrow result with the resulting location + static arrow::Result ForGrpcTls(const std::string& host, const int port); + + /// \brief Initialize a location for a domain socket-based Flight /// service /// \param[in] path The path to the domain socket /// \return Arrow result with the resulting location @@ -547,12 +781,11 @@ struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType { static arrow::Result ForScheme(const std::string& scheme, const std::string& host, const int port); - /// \brief Get a representation of this URI as a string. - std::string ToString() const; - /// \brief Get the scheme of this URI. std::string scheme() const; + /// \brief Get a representation of this URI as a string. + std::string ToString() const; bool Equals(const Location& other) const; using SuperT::Deserialize; @@ -645,6 +878,8 @@ struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest RenewFlightEndpointRequest* out); }; +// FlightData in Flight.proto maps to FlightPayload here. + /// \brief Staging data structure for messages about to be put on the wire /// /// This structure corresponds to FlightData in the protocol. @@ -664,217 +899,30 @@ struct ARROW_FLIGHT_EXPORT FlightPayload { Status Validate() const; }; -/// \brief Schema result returned after a schema request RPC -struct ARROW_FLIGHT_EXPORT SchemaResult : public internal::BaseType { - public: - SchemaResult() = default; - explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {} - - /// \brief Factory method to construct a SchemaResult. - static arrow::Result> Make(const Schema& schema); +// A wrapper around arrow.flight.protocol.PutResult is not defined - /// \brief return schema - /// \param[in,out] dictionary_memo for dictionary bookkeeping, will - /// be modified - /// \return Arrow result with the reconstructed Schema - arrow::Result> GetSchema( - ipc::DictionaryMemo* dictionary_memo) const; +// Session management messages - const std::string& serialized_schema() const { return raw_schema_; } - - std::string ToString() const; - bool Equals(const SchemaResult& other) const; - - using SuperT::Deserialize; - using SuperT::SerializeToString; - - /// \brief Serialize this message to its wire-format representation. - /// - /// Use `SerializeToString()` if you want a Result-returning version. - arrow::Status SerializeToString(std::string* out) const; - - /// \brief Deserialize this message from its wire-format representation. - /// - /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, SchemaResult* out); - - private: - std::string raw_schema_; -}; - -/// \brief The access coordinates for retrieval of a dataset, returned by -/// GetFlightInfo -class ARROW_FLIGHT_EXPORT FlightInfo - : public internal::BaseType> { - public: - struct Data { - std::string schema; - FlightDescriptor descriptor; - std::vector endpoints; - int64_t total_records = -1; - int64_t total_bytes = -1; - bool ordered = false; - std::string app_metadata; - }; - - explicit FlightInfo(Data data) : data_(std::move(data)), reconstructed_schema_(false) {} - - /// \brief Factory method to construct a FlightInfo. - static arrow::Result Make(const Schema& schema, - const FlightDescriptor& descriptor, - const std::vector& endpoints, - int64_t total_records, int64_t total_bytes, - bool ordered = false, - std::string app_metadata = ""); - - /// \brief Deserialize the Arrow schema of the dataset. Populate any - /// dictionary encoded fields into a DictionaryMemo for - /// bookkeeping - /// \param[in,out] dictionary_memo for dictionary bookkeeping, will - /// be modified - /// \return Arrow result with the reconstructed Schema - arrow::Result> GetSchema( - ipc::DictionaryMemo* dictionary_memo) const; - - const std::string& serialized_schema() const { return data_.schema; } - - /// The descriptor associated with this flight, may not be set - const FlightDescriptor& descriptor() const { return data_.descriptor; } - - /// A list of endpoints associated with the flight (dataset). To consume the - /// whole flight, all endpoints must be consumed - const std::vector& endpoints() const { return data_.endpoints; } - - /// The total number of records (rows) in the dataset. If unknown, set to -1 - int64_t total_records() const { return data_.total_records; } - - /// The total number of bytes in the dataset. If unknown, set to -1 - int64_t total_bytes() const { return data_.total_bytes; } - - /// Whether endpoints are in the same order as the data. - bool ordered() const { return data_.ordered; } - - /// Application-defined opaque metadata - const std::string& app_metadata() const { return data_.app_metadata; } - - using SuperT::Deserialize; - using SuperT::SerializeToString; - - /// \brief Get the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - /// - /// Use `SerializeToString()` if you want a Result-returning version. - arrow::Status SerializeToString(std::string* out) const; - - /// \brief Parse the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - /// - /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, - std::unique_ptr* out); - - std::string ToString() const; - - /// Compare two FlightInfo for equality. This will compare the - /// serialized schema representations, NOT the logical equality of - /// the schemas. - bool Equals(const FlightInfo& other) const; - - private: - Data data_; - mutable std::shared_ptr schema_; - mutable bool reconstructed_schema_; -}; - -/// \brief The information to process a long-running query. -class ARROW_FLIGHT_EXPORT PollInfo - : public internal::BaseType> { - public: - /// The currently available results so far. - std::unique_ptr info = NULLPTR; - /// The descriptor the client should use on the next try. If unset, - /// the query is complete. - std::optional descriptor = std::nullopt; - /// Query progress. Must be in [0.0, 1.0] but need not be - /// monotonic or nondecreasing. If unknown, do not set. - std::optional progress = std::nullopt; - /// Expiration time for this request. After this passes, the server - /// might not accept the poll descriptor anymore (and the query may - /// be cancelled). This may be updated on a call to PollFlightInfo. - std::optional expiration_time = std::nullopt; - - PollInfo() - : info(NULLPTR), - descriptor(std::nullopt), - progress(std::nullopt), - expiration_time(std::nullopt) {} - - PollInfo(std::unique_ptr info, std::optional descriptor, - std::optional progress, std::optional expiration_time) - : info(std::move(info)), - descriptor(std::move(descriptor)), - progress(progress), - expiration_time(expiration_time) {} - - PollInfo(const PollInfo& other) - : info(other.info ? std::make_unique(*other.info) : NULLPTR), - descriptor(other.descriptor), - progress(other.progress), - expiration_time(other.expiration_time) {} - PollInfo(PollInfo&& other) noexcept = default; - ~PollInfo() = default; - PollInfo& operator=(const PollInfo& other) { - info = other.info ? std::make_unique(*other.info) : NULLPTR; - descriptor = other.descriptor; - progress = other.progress; - expiration_time = other.expiration_time; - return *this; - } - PollInfo& operator=(PollInfo&& other) = default; - - using SuperT::Deserialize; - using SuperT::SerializeToString; - - /// \brief Get the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - /// - /// Use `SerializeToString()` if you want a Result-returning version. - arrow::Status SerializeToString(std::string* out) const; - - /// \brief Parse the wire-format representation of this type. - /// - /// Useful when interoperating with non-Flight systems (e.g. REST - /// services) that may want to return Flight types. - /// - /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, - std::unique_ptr* out); - - std::string ToString() const; - - /// Compare two PollInfo for equality. This will compare the - /// serialized schema representations, NOT the logical equality of - /// the schemas. - bool Equals(const PollInfo& other) const; -}; +/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions +/// +/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue +/// should attempt to unset or clear the named option value on the server. +using SessionOptionValue = std::variant>; +std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v); -/// \brief The request of the CancelFlightInfoRequest action. -struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest - : public internal::BaseType { - std::unique_ptr info; +/// \brief A request to set a set of session options by name/value. +struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest + : public internal::BaseType { + std::map session_options; - CancelFlightInfoRequest() = default; - CancelFlightInfoRequest(std::unique_ptr info) // NOLINT runtime/explicit - : info(std::move(info)) {} + SetSessionOptionsRequest() = default; + explicit SetSessionOptionsRequest( + std::map session_options) + : session_options(std::move(session_options)) {} std::string ToString() const; - bool Equals(const CancelFlightInfoRequest& other) const; + bool Equals(const SetSessionOptionsRequest& other) const; using SuperT::Deserialize; using SuperT::SerializeToString; @@ -888,16 +936,9 @@ struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest /// /// Use `Deserialize(serialized)` if you want a Result-returning version. static arrow::Status Deserialize(std::string_view serialized, - CancelFlightInfoRequest* out); + SetSessionOptionsRequest* out); }; -/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions -/// -/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue -/// should attempt to unset or clear the named option value on the server. -using SessionOptionValue = std::variant>; - /// \brief The result of setting a session option. enum class SetSessionOptionErrorValue : int8_t { /// \brief The status of setting the option is unknown. @@ -915,57 +956,6 @@ enum class SetSessionOptionErrorValue : int8_t { std::string ToString(const SetSessionOptionErrorValue& error_value); std::ostream& operator<<(std::ostream& os, const SetSessionOptionErrorValue& error_value); -/// \brief The result of closing a session. -enum class CloseSessionStatus : int8_t { - // \brief The session close status is unknown. - // - // Servers should avoid using this value (send a NOT_FOUND error if the requested - // session is not known). Clients can retry the request. - kUnspecified, - // \brief The session close request is complete. - // - // Subsequent requests with the same session produce a NOT_FOUND error. - kClosed, - // \brief The session close request is in progress. - // - // The client may retry the request. - kClosing, - // \brief The session is not closeable. - // - // The client should not retry the request. - kNotClosable -}; -std::string ToString(const CloseSessionStatus& status); -std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status); - -/// \brief A request to set a set of session options by name/value. -struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest - : public internal::BaseType { - std::map session_options; - - SetSessionOptionsRequest() = default; - explicit SetSessionOptionsRequest( - std::map session_options) - : session_options(std::move(session_options)) {} - - std::string ToString() const; - bool Equals(const SetSessionOptionsRequest& other) const; - - using SuperT::Deserialize; - using SuperT::SerializeToString; - - /// \brief Serialize this message to its wire-format representation. - /// - /// Use `SerializeToString()` if you want a Result-returning version. - arrow::Status SerializeToString(std::string* out) const; - - /// \brief Deserialize this message from its wire-format representation. - /// - /// Use `Deserialize(serialized)` if you want a Result-returning version. - static arrow::Status Deserialize(std::string_view serialized, - SetSessionOptionsRequest* out); -}; - /// \brief The result(s) of setting session option(s). struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult : public internal::BaseType { @@ -1078,6 +1068,29 @@ struct ARROW_FLIGHT_EXPORT CloseSessionRequest static arrow::Status Deserialize(std::string_view serialized, CloseSessionRequest* out); }; +/// \brief The result of closing a session. +enum class CloseSessionStatus : int8_t { + // \brief The session close status is unknown. + // + // Servers should avoid using this value (send a NOT_FOUND error if the requested + // session is not known). Clients can retry the request. + kUnspecified, + // \brief The session close request is complete. + // + // Subsequent requests with the same session produce a NOT_FOUND error. + kClosed, + // \brief The session close request is in progress. + // + // The client may retry the request. + kClosing, + // \brief The session is not closeable. + // + // The client should not retry the request. + kNotClosable +}; +std::string ToString(const CloseSessionStatus& status); +std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status); + /// \brief The result of attempting to close the client session. struct ARROW_FLIGHT_EXPORT CloseSessionResult : public internal::BaseType { @@ -1104,6 +1117,8 @@ struct ARROW_FLIGHT_EXPORT CloseSessionResult static arrow::Status Deserialize(std::string_view serialized, CloseSessionResult* out); }; +//------------------------------------------------------------ + /// \brief An iterator to FlightInfo instances returned by ListFlights. class ARROW_FLIGHT_EXPORT FlightListing { public: diff --git a/format/Flight.proto b/format/Flight.proto index 4963e8c09ae47..2187a51ed48f4 100644 --- a/format/Flight.proto +++ b/format/Flight.proto @@ -208,24 +208,6 @@ message Action { bytes body = 2; } -/* - * The request of the CancelFlightInfo action. - * - * The request should be stored in Action.body. - */ -message CancelFlightInfoRequest { - FlightInfo info = 1; -} - -/* - * The request of the RenewFlightEndpoint action. - * - * The request should be stored in Action.body. - */ -message RenewFlightEndpointRequest { - FlightEndpoint endpoint = 1; -} - /* * An opaque result returned after executing an action. */ @@ -233,36 +215,6 @@ message Result { bytes body = 1; } -/* - * The result of a cancel operation. - * - * This is used by CancelFlightInfoResult.status. - */ -enum CancelStatus { - // The cancellation status is unknown. Servers should avoid using - // this value (send a NOT_FOUND error if the requested query is - // not known). Clients can retry the request. - CANCEL_STATUS_UNSPECIFIED = 0; - // The cancellation request is complete. Subsequent requests with - // the same payload may return CANCELLED or a NOT_FOUND error. - CANCEL_STATUS_CANCELLED = 1; - // The cancellation request is in progress. The client may retry - // the cancellation request. - CANCEL_STATUS_CANCELLING = 2; - // The query is not cancellable. The client should not retry the - // cancellation request. - CANCEL_STATUS_NOT_CANCELLABLE = 3; -} - -/* - * The result of the CancelFlightInfo action. - * - * The result should be stored in Result.body. - */ -message CancelFlightInfoResult { - CancelStatus status = 1; -} - /* * Wrap the result of a getSchema call */ @@ -423,6 +375,64 @@ message PollInfo { google.protobuf.Timestamp expiration_time = 4; } +/* + * The request of the CancelFlightInfo action. + * + * The request should be stored in Action.body. + */ +message CancelFlightInfoRequest { + FlightInfo info = 1; +} + +/* + * The result of a cancel operation. + * + * This is used by CancelFlightInfoResult.status. + */ +enum CancelStatus { + // The cancellation status is unknown. Servers should avoid using + // this value (send a NOT_FOUND error if the requested query is + // not known). Clients can retry the request. + CANCEL_STATUS_UNSPECIFIED = 0; + // The cancellation request is complete. Subsequent requests with + // the same payload may return CANCELLED or a NOT_FOUND error. + CANCEL_STATUS_CANCELLED = 1; + // The cancellation request is in progress. The client may retry + // the cancellation request. + CANCEL_STATUS_CANCELLING = 2; + // The query is not cancellable. The client should not retry the + // cancellation request. + CANCEL_STATUS_NOT_CANCELLABLE = 3; +} + +/* + * The result of the CancelFlightInfo action. + * + * The result should be stored in Result.body. + */ +message CancelFlightInfoResult { + CancelStatus status = 1; +} + +/* + * An opaque identifier that the service can use to retrieve a particular + * portion of a stream. + * + * Tickets are meant to be single use. It is an error/application-defined + * behavior to reuse a ticket. + */ +message Ticket { + bytes ticket = 1; +} + +/* + * A location where a Flight service will accept retrieval of a particular + * stream given a ticket. + */ +message Location { + string uri = 1; +} + /* * A particular stream or split associated with a flight. */ @@ -475,22 +485,12 @@ message FlightEndpoint { } /* - * A location where a Flight service will accept retrieval of a particular - * stream given a ticket. - */ -message Location { - string uri = 1; -} - -/* - * An opaque identifier that the service can use to retrieve a particular - * portion of a stream. + * The request of the RenewFlightEndpoint action. * - * Tickets are meant to be single use. It is an error/application-defined - * behavior to reuse a ticket. + * The request should be stored in Action.body. */ -message Ticket { - bytes ticket = 1; +message RenewFlightEndpointRequest { + FlightEndpoint endpoint = 1; } /* From d21a924f3012c1e589a3393ebae2c78ee290ba5c Mon Sep 17 00:00:00 2001 From: Alkis Evlogimenos Date: Mon, 22 Jul 2024 17:55:28 +0300 Subject: [PATCH 106/122] GH-42102: [C++][Parquet] Add binary that extracts a footer from a parquet file (#42174) ### Rationale for this change This binary will make it a lot easier for customers to share their parquet metadata with the community so that we can build a repository of footers that can be used for advancing the state of metadata in parquet. ### What changes are included in this PR? Usage from the file binary itself: ``` Usage: parquet-dump-footer -h|--help Print help and exit --no-scrub Do not scrub potentially confidential metadata --debug Output text represenation of footer for inspection --in Input file (required): must be an URI or an absolute local path --out Output file (optional, default stdout) Dump the footer of a Parquet file to stdout or to a file, optionally with potentially confidential metadata scrubbed. ``` ### Are these changes tested? Manually on existing parquet files. ### Are there any user-facing changes? No. * GitHub Issue: #42102 Lead-authored-by: Alkis Evlogimenos Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/parquet/metadata.cc | 65 +++++++++++ cpp/src/parquet/metadata.h | 7 ++ cpp/tools/parquet/CMakeLists.txt | 3 +- cpp/tools/parquet/parquet_dump_footer.cc | 135 +++++++++++++++++++++++ 4 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 cpp/tools/parquet/parquet_dump_footer.cc diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index ee8391818962c..7bab9104619ce 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -29,6 +31,7 @@ #include "arrow/io/memory.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" +#include "arrow/util/pcg_random.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/internal_file_decryptor.h" #include "parquet/exception.h" @@ -599,6 +602,49 @@ std::vector RowGroupMetaData::sorting_columns() const { return impl_->sorting_columns(); } +// Replace string data with random-generated uppercase characters +static void Scrub(std::string* s) { + static ::arrow::random::pcg64 rng; + std::uniform_int_distribution<> caps(65, 90); + for (auto& c : *s) c = caps(rng); +} + +// Replace potentially sensitive metadata with random data +static void Scrub(format::FileMetaData* md) { + for (auto& s : md->schema) { + Scrub(&s.name); + } + for (auto& r : md->row_groups) { + for (auto& c : r.columns) { + Scrub(&c.file_path); + if (c.__isset.meta_data) { + auto& m = c.meta_data; + for (auto& p : m.path_in_schema) Scrub(&p); + for (auto& kv : m.key_value_metadata) { + Scrub(&kv.key); + Scrub(&kv.value); + } + Scrub(&m.statistics.max_value); + Scrub(&m.statistics.min_value); + Scrub(&m.statistics.min); + Scrub(&m.statistics.max); + } + + if (c.crypto_metadata.__isset.ENCRYPTION_WITH_COLUMN_KEY) { + auto& m = c.crypto_metadata.ENCRYPTION_WITH_COLUMN_KEY; + for (auto& p : m.path_in_schema) Scrub(&p); + Scrub(&m.key_metadata); + } + Scrub(&c.encrypted_column_metadata); + } + } + for (auto& kv : md->key_value_metadata) { + Scrub(&kv.key); + Scrub(&kv.value); + } + Scrub(&md->footer_signing_key_metadata); +} + // file metadata class FileMetaData::FileMetaDataImpl { public: @@ -821,6 +867,21 @@ class FileMetaData::FileMetaDataImpl { return out; } + std::string SerializeUnencrypted(bool scrub, bool debug) const { + auto md = *metadata_; + if (scrub) Scrub(&md); + if (debug) { + std::ostringstream ss; + md.printTo(ss); + return ss.str(); + } else { + ThriftSerializer serializer; + std::string out; + serializer.SerializeToString(&md, &out); + return out; + } + } + void set_file_decryptor(std::shared_ptr file_decryptor) { file_decryptor_ = std::move(file_decryptor); } @@ -992,6 +1053,10 @@ std::shared_ptr FileMetaData::Subset( return impl_->Subset(row_groups); } +std::string FileMetaData::SerializeUnencrypted(bool scrub, bool json) const { + return impl_->SerializeUnencrypted(scrub, json); +} + void FileMetaData::WriteTo(::arrow::io::OutputStream* dst, const std::shared_ptr& encryptor) const { return impl_->WriteTo(dst, encryptor); diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 9fc30df58e0d3..e02d2e7c852f0 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -396,6 +396,13 @@ class PARQUET_EXPORT FileMetaData { /// FileMetaData. std::shared_ptr Subset(const std::vector& row_groups) const; + /// \brief Serialize metadata unencrypted as string + /// + /// \param[in] scrub whether to remove sensitive information from the metadata. + /// \param[in] debug whether to serialize the metadata as Thrift (if false) or + /// debug text (if true). + std::string SerializeUnencrypted(bool scrub, bool debug) const; + private: friend FileMetaDataBuilder; friend class SerializedFile; diff --git a/cpp/tools/parquet/CMakeLists.txt b/cpp/tools/parquet/CMakeLists.txt index 81ab49421d0f6..e05645da28a0e 100644 --- a/cpp/tools/parquet/CMakeLists.txt +++ b/cpp/tools/parquet/CMakeLists.txt @@ -16,7 +16,7 @@ # under the License. if(PARQUET_BUILD_EXECUTABLES) - set(PARQUET_TOOLS parquet-dump-schema parquet-reader parquet-scan) + set(PARQUET_TOOLS parquet-dump-footer parquet-dump-schema parquet-reader parquet-scan) foreach(TOOL ${PARQUET_TOOLS}) string(REGEX REPLACE "-" "_" TOOL_SOURCE ${TOOL}) @@ -31,6 +31,7 @@ if(PARQUET_BUILD_EXECUTABLES) install(TARGETS ${TOOL} ${INSTALL_IS_OPTIONAL} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) endforeach(TOOL) + target_link_libraries(parquet-dump-footer ${ARROW_LIBRARIES}) add_dependencies(parquet ${PARQUET_TOOLS}) endif() diff --git a/cpp/tools/parquet/parquet_dump_footer.cc b/cpp/tools/parquet/parquet_dump_footer.cc new file mode 100644 index 0000000000000..c7a4b78fdd823 --- /dev/null +++ b/cpp/tools/parquet/parquet_dump_footer.cc @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include "arrow/filesystem/filesystem.h" +#include "arrow/util/endian.h" +#include "arrow/util/ubsan.h" +#include "parquet/metadata.h" + +namespace parquet { +namespace { +uint32_t ReadLE32(const void* p) { + uint32_t x = ::arrow::util::SafeLoadAs(static_cast(p)); + return ::arrow::bit_util::FromLittleEndian(x); +} + +void AppendLE32(uint32_t v, std::string* out) { + v = ::arrow::bit_util::ToLittleEndian(v); + out->append(reinterpret_cast(&v), sizeof(v)); +} + +int DoIt(std::string in, bool scrub, bool json, std::string out) { + std::string path; + auto fs = ::arrow::fs::FileSystemFromUriOrPath(in, &path).ValueOrDie(); + auto file = fs->OpenInputFile(path).ValueOrDie(); + int64_t file_len = file->GetSize().ValueOrDie(); + if (file_len < 8) { + std::cerr << "File too short: " << in << "\n"; + return 3; + } + // First do an opportunistic read of up to 1 MiB to try and get the entire footer. + int64_t tail_len = std::min(file_len, int64_t{1} << 20); + std::string tail; + tail.resize(tail_len); + char* data = tail.data(); + file->ReadAt(file_len - tail_len, tail_len, data).ValueOrDie(); + if (auto magic = ReadLE32(data + tail_len - 4); magic != ReadLE32("PAR1")) { + std::cerr << "Not a Parquet file: " << in << "\n"; + return 4; + } + uint32_t metadata_len = ReadLE32(data + tail_len - 8); + if (tail_len >= metadata_len + 8) { + // The footer is entirely in the initial read. Trim to size. + tail = tail.substr(tail_len - (metadata_len + 8)); + } else { + // The footer is larger than the initial read, read again the exact size. + if (metadata_len > file_len) { + std::cerr << "File too short: " << in << "\n"; + return 5; + } + tail_len = metadata_len + 8; + tail.resize(tail_len); + data = tail.data(); + file->ReadAt(file_len - tail_len, tail_len, data).ValueOrDie(); + } + auto md = FileMetaData::Make(tail.data(), &metadata_len); + std::string ser = md->SerializeUnencrypted(scrub, json); + if (!json) { + AppendLE32(static_cast(ser.size()), &ser); + ser.append("PAR1", 4); + } + std::optional fout; + if (!out.empty()) fout.emplace(out, std::ios::out); + std::ostream& os = fout ? *fout : std::cout; + if (!os.write(ser.data(), ser.size())) { + std::cerr << "Failed to write to output file: " << out << "\n"; + return 6; + } + + return 0; +} +} // namespace +} // namespace parquet + +static int PrintHelp() { + std::cerr << R"(Usage: parquet-dump-footer + -h|--help Print help and exit + --no-scrub Do not scrub potentially confidential metadata + --debug Output text represenation of footer for inspection + --in Input file (required): must be an URI or an absolute local path + --out Output file (optional, default stdout) + + Dump the footer of a Parquet file to stdout or to a file, optionally with + potentially confidential metadata scrubbed. +)"; + return 1; +} + +int main(int argc, char** argv) { + bool scrub = true; + bool json = false; + std::string in; + std::string out; + for (int i = 1; i < argc; i++) { + char* arg = argv[i]; + if (!std::strcmp(arg, "-h") || !std::strcmp(arg, "--help")) { + return PrintHelp(); + } else if (!std::strcmp(arg, "--no-scrub")) { + scrub = false; + } else if (!std::strcmp(arg, "--json")) { + json = true; + } else if (!std::strcmp(arg, "--in")) { + if (i + 1 >= argc) return PrintHelp(); + in = argv[++i]; + } else if (!std::strcmp(arg, "--out")) { + if (i + 1 >= argc) return PrintHelp(); + out = argv[++i]; + } else { + // Unknown option. + return PrintHelp(); + } + } + if (in.empty()) return PrintHelp(); + + return parquet::DoIt(in, scrub, json, out); +} From 663cbb5fcebb1fc17f311e6f4d71452f14feae5b Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Mon, 22 Jul 2024 11:46:14 -0400 Subject: [PATCH 107/122] MINOR: [Java] Bump org.mockito:mockito-junit-jupiter from 2.25.1 to 5.12.0 in /java (#43308) ### Rationale for this change Bump version now that Java 8 is deprecated. Follow up PR from https://github.com/apache/arrow/pull/39408. ### What changes are included in this PR? * Bump mockito to 5.12.0 ### Are these changes tested? CI ### Are there any user-facing changes? No Authored-by: Dane Pitkin Signed-off-by: Dane Pitkin --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 4228496ef682f..2e7b57e462690 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -274,7 +274,7 @@ under the License. org.mockito mockito-junit-jupiter - 2.25.1 + 5.12.0 test From efcaa64dbea3e86fd9d6fa21d3aabd17f3667d35 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Mon, 22 Jul 2024 11:46:46 -0400 Subject: [PATCH 108/122] MINOR: [Java] Bump org.apache.derby:derby from 10.14.2.0 to 10.15.2.0 in /java (#43309) ### Rationale for this change Bump to latest version that supports Java 11. See Apache Derby support matrix here https://db.apache.org/derby/derby_downloads.html ### What changes are included in this PR? * Bump derby to 10.15.2.0 ### Are these changes tested? CI ### Are there any user-facing changes? No Authored-by: Dane Pitkin Signed-off-by: Dane Pitkin --- java/flight/flight-sql/pom.xml | 2 +- .../java/org/apache/arrow/flight/sql/test/TestFlightSql.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index fc6c789ee99d6..81bdc1a25636a 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -87,7 +87,7 @@ under the License. org.apache.derby derby - 10.14.2.0 + 10.15.2.0 test diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java index 8387834947283..2eb74adc5bc0e 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java @@ -123,10 +123,10 @@ protected static void setUpExpectedResultsMap() { Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), - "10.14.2.0 - (1828579)"); + "10.15.2.0 - (1873585)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), - "10.14.2.0 - (1828579)"); + "10.15.2.0 - (1873585)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( From ec58e4de9e08e4398e293cde70a7fed5bfb3ba5c Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Mon, 22 Jul 2024 12:53:57 -0400 Subject: [PATCH 109/122] MINOR: [Java] Bump ch.qos.logback:logback-classic from 1.3.14 to 1.4.14 in /java (#43310) ### Rationale for this change With Java 8 deprecated, we can bump logback to the next version which supports Java 11 at build and runtime. See https://github.com/apache/arrow/pull/40778/files ### What changes are included in this PR? * Bump logback-classic to 1.4.14 ### Are these changes tested? CI ### Are there any user-facing changes? No Authored-by: Dane Pitkin Signed-off-by: Dane Pitkin --- java/pom.xml | 2 +- java/tools/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/pom.xml b/java/pom.xml index 2e7b57e462690..45acf9dd0c732 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -280,7 +280,7 @@ under the License. ch.qos.logback logback-classic - 1.3.14 + 1.4.14 test diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 9c52e21402bcd..94566495dff19 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -59,7 +59,7 @@ under the License. ch.qos.logback logback-classic - 1.3.14 + 1.4.14 test