Skip to content

Commit

Permalink
Merge branch 'apache:master' into large-offsets-ipc
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroshade authored Aug 2, 2022
2 parents e3ab876 + a9dcaff commit af0b9c1
Show file tree
Hide file tree
Showing 27 changed files with 714 additions and 279 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,14 @@ jobs:
shell: Rscript {0}
working-directory: r
run: |
Sys.setenv(
RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "r", "windows", "libarrow.zip"),
MAKEFLAGS = paste0("-j", parallel::detectCores()),
ARROW_R_DEV = TRUE,
"_R_CHECK_FORCE_SUGGESTS_" = FALSE
)
# we use pak for package installation since it is faster, safer and more convenient
pak::local_install()
pak::pak("lintr")
lintr::expect_lint_free()
- name: Dump install logs
Expand Down
13 changes: 2 additions & 11 deletions c_glib/test/test-list-scalar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,8 @@ def test_equal
end

def test_to_s
assert_equal(<<-LIST.strip, @scalar.to_s)
[
[
[
1,
2,
3
]
]
]
LIST
assert_equal("list<item: list<value: int8>>[list<value: int8>[1, 2, 3]]",
@scalar.to_s)
end

def test_value
Expand Down
13 changes: 1 addition & 12 deletions c_glib/test/test-map-scalar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,7 @@ def test_equal

def test_to_s
assert_equal(<<-MAP.strip, @scalar.to_s)
[
keys:
[
"hello",
"world"
]
values:
[
1,
2
]
]
map<string, int8>[{key:string = hello, value:int8 = 1}, {key:string = world, value:int8 = 2}]
MAP
end

Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1114,13 +1114,17 @@ Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* siz
RETURN_IF_PYERROR();
for (i = 0; i < n; i++) {
PyObject* item = PyIter_Next(iter);
if (!item) break;
if (!item) {
// either an error occurred or the iterator ended
RETURN_IF_PYERROR();
break;
}
PyList_SET_ITEM(lst, i, item);
}
// Shrink list if len(iterator) < size
if (i < n && PyList_SetSlice(lst, i, n, NULL)) {
Py_DECREF(lst);
return Status::UnknownError("failed to resize list");
RETURN_IF_PYERROR();
}
*seq = lst;
*size = std::min<int64_t>(i, *size);
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,20 @@ Status CastImpl(const StructScalar& from, StringScalar* to) {
return Status::OK();
}

// list based types (list, large list and map (fixed sized list too)) to string
Status CastImpl(const BaseListScalar& from, StringScalar* to) {
std::stringstream ss;
ss << from.type->ToString() << "[";
for (int64_t i = 0; i < from.value->length(); i++) {
if (i > 0) ss << ", ";
ARROW_ASSIGN_OR_RAISE(auto value, from.value->GetScalar(i));
ss << value->ToString();
}
ss << ']';
to->value = Buffer::FromString(ss.str());
return Status::OK();
}

Status CastImpl(const UnionScalar& from, StringScalar* to) {
const auto& union_ty = checked_cast<const UnionType&>(*from.type);
std::stringstream ss;
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/scalar_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1049,11 +1049,15 @@ class TestListScalar : public ::testing::Test {
ASSERT_OK(scalar.ValidateFull());
ASSERT_TRUE(scalar.is_valid);
AssertTypeEqual(scalar.type, type_);
// list<item: int16>[1, 2, null]
ASSERT_THAT(scalar.ToString(), ::testing::AllOf(::testing::HasSubstr("item: int16"),
::testing::EndsWith("[1, 2, null]")));

auto null_scalar = CheckMakeNullScalar(type_);
ASSERT_OK(null_scalar->ValidateFull());
ASSERT_FALSE(null_scalar->is_valid);
AssertTypeEqual(null_scalar->type, type_);
ASSERT_EQ(null_scalar->ToString(), "null");
}

void TestValidateErrors() {
Expand Down
33 changes: 24 additions & 9 deletions dev/archery/archery/integration/tester_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ def load_version_from_pom():
),
)
_ARROW_FLIGHT_SERVER = (
"org.apache.arrow.flight.integration.tests." "IntegrationTestServer"
"org.apache.arrow.flight.integration.tests.IntegrationTestServer"
)
_ARROW_FLIGHT_CLIENT = (
"org.apache.arrow.flight.integration.tests." "IntegrationTestClient"
"org.apache.arrow.flight.integration.tests.IntegrationTestClient"
)


Expand All @@ -70,10 +70,24 @@ class JavaTester(Tester):

name = 'Java'

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Detect whether we're on Java 8 or Java 9+
self._java_opts = _JAVA_OPTS[:]
proc = subprocess.run(
['java', '--add-opens'],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
text=True)
if 'Unrecognized option: --add-opens' not in proc.stderr:
# Java 9+
self._java_opts.append(
'--add-opens=java.base/java.nio=ALL-UNNAMED')

def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
cmd = (
['java'] +
_JAVA_OPTS +
self._java_opts +
['-cp', _ARROW_TOOLS_JAR, 'org.apache.arrow.tools.Integration']
)

Expand All @@ -98,7 +112,7 @@ def json_to_file(self, json_path, arrow_path):

def stream_to_file(self, stream_path, file_path):
cmd = (
['java'] + _JAVA_OPTS + [
['java'] + self._java_opts + [
'-cp',
_ARROW_TOOLS_JAR,
'org.apache.arrow.tools.StreamToFile',
Expand All @@ -112,7 +126,7 @@ def stream_to_file(self, stream_path, file_path):

def file_to_stream(self, file_path, stream_path):
cmd = (
['java'] + _JAVA_OPTS + [
['java'] + self._java_opts + [
'-cp',
_ARROW_TOOLS_JAR,
'org.apache.arrow.tools.FileToStream',
Expand All @@ -126,9 +140,10 @@ def file_to_stream(self, file_path, stream_path):

def flight_request(self, port, json_path=None, scenario_name=None):
cmd = (
['java'] + _JAVA_OPTS + ['-cp', _ARROW_FLIGHT_JAR,
_ARROW_FLIGHT_CLIENT, '-port', str(port)]
)
['java'] + self._java_opts + [
'-cp', _ARROW_FLIGHT_JAR, _ARROW_FLIGHT_CLIENT, '-port', str(
port)
])

if json_path:
cmd.extend(('-j', json_path))
Expand All @@ -145,7 +160,7 @@ def flight_request(self, port, json_path=None, scenario_name=None):
def flight_server(self, scenario_name=None):
cmd = (
['java'] +
_JAVA_OPTS +
self._java_opts +
['-cp', _ARROW_FLIGHT_JAR, _ARROW_FLIGHT_SERVER, '-port', '0']
)
if scenario_name:
Expand Down
1 change: 1 addition & 0 deletions docs/source/developers/cpp/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ boolean flags to ``cmake``.
filesystems
* ``-DARROW_FLIGHT=ON``: Arrow Flight RPC system, which depends at least on
gRPC
* ``-DARROW_FLIGHT_SQL=ON``: Arrow Flight SQL
* ``-DARROW_GANDIVA=ON``: Gandiva expression compiler, depends on LLVM,
Protocol Buffers, and re2
* ``-DARROW_GANDIVA_JAVA=ON``: Gandiva JNI bindings for Java
Expand Down
157 changes: 157 additions & 0 deletions docs/source/developers/java/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,160 @@ Common Errors
.. _Archery: https://github.com/apache/arrow/blob/master/dev/archery/README.md
.. _Dependency Resolution: https://arrow.apache.org/docs/developers/cpp/building.html#individual-dependency-resolution
.. _C++ shared libraries: https://arrow.apache.org/docs/cpp/build_system.html


Installing Nightly Packages
===========================

.. warning::
These packages are not official releases. Use them at your own risk.

Arrow nightly builds are posted on the mailing list at `[email protected]`_.
The artifacts are uploaded to GitHub. For example, for 2022/07/30, they can be found at `Github Nightly`_.


Installing from Apache Nightlies
--------------------------------
1. Look up the nightly version number for the Arrow libraries used.

For example, for ``arrow-memory``, visit https://nightlies.apache.org/arrow/java/org/apache/arrow/arrow-memory/ and see what versions are available (e.g. 9.0.0.dev501).
2. Add Apache Nightlies Repository to the Maven/Gradle project.

.. code-block:: xml
<properties>
<arrow.version>9.0.0.dev501</arrow.version>
</properties>
...
<repositories>
<repository>
<id>arrow-apache-nightlies</id>
<url>https://nightlies.apache.org/arrow/java</url>
</repository>
</repositories>
...
<dependencies>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
</dependency>
</dependencies>
...
Installing Manually
-------------------

1. Decide nightly packages repository to use, for example: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars
2. Add packages to your pom.xml, for example: flight-core (it depends on: arrow-format, arrow-vector, arrow-memeory-core and arrow-memory-netty).

.. code-block:: xml
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<arrow.version>9.0.0.dev501</arrow.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-core</artifactId>
<version>${arrow.version}</version>
</dependency>
</dependencies>
3. Download the necessary pom and jar files to a temporary directory:

.. code-block:: shell
$ mkdir nightly-packaging-2022-07-30-0-github-java-jars
$ cd nightly-packaging-2022-07-30-0-github-java-jars
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-java-root-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-format-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-format-9.0.0.dev501.jar
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-vector-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-vector-9.0.0.dev501.jar
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-core-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-netty-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-core-9.0.0.dev501.jar
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-netty-9.0.0.dev501.jar
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-flight-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/flight-core-9.0.0.dev501.pom
$ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/flight-core-9.0.0.dev501.jar
$ tree
.
├── arrow-flight-9.0.0.dev501.pom
├── arrow-format-9.0.0.dev501.jar
├── arrow-format-9.0.0.dev501.pom
├── arrow-java-root-9.0.0.dev501.pom
├── arrow-memory-9.0.0.dev501.pom
├── arrow-memory-core-9.0.0.dev501.jar
├── arrow-memory-core-9.0.0.dev501.pom
├── arrow-memory-netty-9.0.0.dev501.jar
├── arrow-memory-netty-9.0.0.dev501.pom
├── arrow-vector-9.0.0.dev501.jar
├── arrow-vector-9.0.0.dev501.pom
├── flight-core-9.0.0.dev501.jar
└── flight-core-9.0.0.dev501.pom
4. Install the artifacts to the local Maven repository with ``mvn install:install-file``:

.. code-block:: shell
$ mvn install:install-file -Dfile="$(pwd)/arrow-java-root-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-java-root -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/arrow-format-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-format -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/arrow-format-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-format -Dversion=9.0.0.dev501 -Dpackaging=jar
$ mvn install:install-file -Dfile="$(pwd)/arrow-vector-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-vector -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/arrow-vector-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-vector -Dversion=9.0.0.dev501 -Dpackaging=jar
$ mvn install:install-file -Dfile="$(pwd)/arrow-memory-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/arrow-memory-core-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-core -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/arrow-memory-netty-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-netty -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/arrow-memory-core-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-core -Dversion=9.0.0.dev501 -Dpackaging=jar
$ mvn install:install-file -Dfile="$(pwd)/arrow-memory-netty-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-netty -Dversion=9.0.0.dev501 -Dpackaging=jar
$ mvn install:install-file -Dfile="$(pwd)/arrow-flight-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-flight -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/flight-core-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=flight-core -Dversion=9.0.0.dev501 -Dpackaging=pom
$ mvn install:install-file -Dfile="$(pwd)/flight-core-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=flight-core -Dversion=9.0.0.dev501 -Dpackaging=jar
5. Validate that the packages were installed:

.. code-block:: shell
$ tree ~/.m2/repository/org/apache/arrow
.
├── arrow-flight
│   ├── 9.0.0.dev501
│   │   └── arrow-flight-9.0.0.dev501.pom
├── arrow-format
│   ├── 9.0.0.dev501
│   │   ├── arrow-format-9.0.0.dev501.jar
│   │   └── arrow-format-9.0.0.dev501.pom
├── arrow-java-root
│   ├── 9.0.0.dev501
│   │   └── arrow-java-root-9.0.0.dev501.pom
├── arrow-memory
│   ├── 9.0.0.dev501
│   │   └── arrow-memory-9.0.0.dev501.pom
├── arrow-memory-core
│   ├── 9.0.0.dev501
│   │   ├── arrow-memory-core-9.0.0.dev501.jar
│   │   └── arrow-memory-core-9.0.0.dev501.pom
├── arrow-memory-netty
│   ├── 9.0.0.dev501
│   │   ├── arrow-memory-netty-9.0.0.dev501.jar
│   │   └── arrow-memory-netty-9.0.0.dev501.pom
├── arrow-vector
│   ├── 9.0.0.dev501
│   │   ├── _remote.repositories
│   │   ├── arrow-vector-9.0.0.dev501.jar
│   │   └── arrow-vector-9.0.0.dev501.pom
└── flight-core
├── 9.0.0.dev501
│   ├── flight-core-9.0.0.dev501.jar
│   └── flight-core-9.0.0.dev501.pom
6. Compile your project like usual with ``mvn clean install``.

.. _[email protected]: https://lists.apache.org/[email protected]
.. _Github Nightly: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars
Loading

0 comments on commit af0b9c1

Please sign in to comment.